# **Python script to download latest DOME Registry contents, related full text papers & provide DOME Registry entries metadata read out (20241202)**
1. DOME Registry contents will be downloaded by API call proividng the json file of DOME Registry data
2. DOME Registry data json will be flattened and converted into CSV for working with entries data (row based data)
3. DOME Registry CSV will be checked and used to produce a metadata readout file (+ graphs)
4. DOME Registry DOIs of articles will be convrted to PMCIDs for full text retrieval 
5. DOME Registry entries will be downloaded as full XML files using PMCIDs list and NCBI Entrez service (Replace with EPMC when API works [^1] )

[^1]: EPMC full text XL API module issues on 20241204

## 1. DOME Registry contents will be downloaded by API call proividng the json file of DOME Registry data

In [105]:
# 1. Use the DOME API to download all entries of the DOME Registry and store this in a json file 
import requests
import os
from datetime import datetime

# Define the URL for the API call: check the API documentation for the correct URL on the DOME Registry website
url = "https://registry.dome-ml.org/api/review?skip=0&limit=250&text=%20&public=true&sort=publication.year&asc=true"

# Make an API request to the URL
response = requests.get(url, headers={'accept': '*/*'})

# Check if the request was successful
if response.status_code == 200:
    # Get the current date in ISO format for file naming
    # Potentially update fiel datetime granularity if needing to run more regularly than daily, DOME Registry contents unlikely to be more regular than this
    current_date = datetime.now().strftime('%Y-%m-%d')
    

    # Create the output file name 
    file_name = f"DOME_Registry_Contents_{current_date}.json"

    # Check if the file pathway already exists
    if os.path.exists(file_name):
        print(f"File already exists for today's date, do you want to overwrite? (y/n)")
        overwrite = input('Do you want to overwrite the file? (y/n): ') 
        if overwrite == 'n':
            print('Exiting without overwriting file')
            exit()
        elif overwrite == 'y':
            print('Overwriting file')
        else:
            print('Invalid input, exiting')
            exit()

    
    # Save the content to a file
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(response.text)
    
    print(f"DOME Registry data downloaded and saved to '{file_name}'")
else:
    print(f"Failed to retrieve the data. Status code: {response.status_code}")



File already exists for today's date, do you want to overwrite? (y/n)
Overwriting file
DOME Registry data downloaded and saved to 'DOME_Registry_Contents_2024-12-05.json'


## 2. DOME Registry data json will be flattened and converted into CSV for working with entries data (row based data)

In [106]:
# 2. Produce DOME Registry contents metadata .csv file and data visualisation
import json

# 2.1 Pretty print DOME Registry contents JSON file for inspection to ensure all looks as expected

# Function to read and pretty-print the JSON file sample entry
def pretty_print_json(file_name):
    try:
        # Open and read the JSON file
        with open(file_name, 'r', encoding='utf-8') as file:
            data = json.load(file)
        
        # Pretty-print the JSON data
        print(json.dumps(data, indent=4))
    
    except Exception as e:
        print(f"Error reading the JSON file: {e}")

# Call the function to pretty-print the JSON file
# pretty_print_json(file_name)


# 2.2 Flatten the JSON for easier data processing and write to a new .json file 
# Function to read JSON data
def read_json(file_name):
    try:
        with open(file_name, 'r', encoding='utf-8') as file:
            data = json.load(file)
        return data
    except Exception as e:
        print(f"Error reading the JSON file: {e}")
        return None

# Function to flatten JSON
def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

# Function to save flattened JSON to a file
def save_flattened_json(flattened_data, output_file_name):
    try:
        with open(output_file_name, 'w', encoding='utf-8') as file:
            json.dump(flattened_data, file, indent=4)
        print(f"Flattened JSON data saved to '{output_file_name}'")
    except Exception as e:
        print(f"Error saving the flattened JSON file: {e}")

# Read JSON data
data = read_json(file_name)

# Flatten JSON data and save to a new file
if data:
    flattened_data = [flatten_json(entry) for entry in data]
    flattened_file_name = ("flattened_"+file_name)
    save_flattened_json(flattened_data, flattened_file_name)
    
    # Print the flattened JSON data to view it
    # to add a print of file output name and sucess ftatement - print(flattened_file_name)
    #print(json.dumps(flattened_data, indent=4))
else:
    print("No data to process.")



#2.3 Convert flattened json to csv 
# Function to read flattened JSON data
import json
import csv
import os

# Define the path to the flattened JSON file
#flattened_file_name = 'flattened_DOME_Registry_Contents.json'  # Replace with your actual file name

# Function to read flattened JSON data
def read_flattened_json(file_name):
    try:
        with open(file_name, 'r', encoding='utf-8') as file:
            data = json.load(file)
        return data
    except Exception as e:
        print(f"Error reading the flattened JSON file: {e}")
        return None

# Function to write JSON data to a CSV file
def write_json_to_csv(json_data, csv_file_name):
    try:
        # Determine all possible headers from the entire dataset
        headers = set()
        for entry in json_data:
            headers.update(entry.keys())
        headers = list(headers)
        
        # Write data to CSV file
        with open(csv_file_name, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            for entry in json_data:
                writer.writerow(entry)
        
        print(f"JSON data written to '{csv_file_name}'")
    except Exception as e:
        print(f"Error writing to the CSV file: {e}")

# Read flattened JSON data
flattened_data = read_flattened_json(flattened_file_name)

# Process JSON data into CSV
if flattened_data:
    csv_file_name = flattened_file_name[:-5]+'.csv'
    write_json_to_csv(flattened_data, csv_file_name)
else:
    print("No data to process.")


Flattened JSON data saved to 'flattened_DOME_Registry_Contents_2024-12-05.json'
JSON data written to 'flattened_DOME_Registry_Contents_2024-12-05.csv'


## 3. DOME Registry CSV will be analysed for entry compliance and used to produce a metadata readout file (+ graphs - TBC)

In [107]:
'''
# Production of the DOME Registry fields validity data & subsequent metadata csv file 
import csv
import re
import os

# 3.1  Simple print of all DOME fields & simple explainer put into text file 
# for assisting with use of DOME Regsitry entry data
# for header in csv_file_name:
#    print(header)

# Define regexes to check various CSV header field entries
# Define the EPMC regex pattern for PMIDs
pmid_pattern = re.compile(r'^\d{8}$')

# Define the regex pattern for DOIs
doi_pattern = re.compile(r'^10.\d{4,9}/[-._;()/:A-Z0-9]+$', re.IGNORECASE)

# Function to read CSV data
def read_csv(file_name):
    try:
        with open(file_name, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            data = [row for row in reader]
        return data
    except Exception as e:
        print(f"Error reading the CSV file: {e}")
        return None

# Function to check PMIDs and DOIs and generate metadata
def check_pmids_and_dois_and_generate_metadata(data, csv_file_name):
    pmid_valid = 0
    pmid_invalid = 0
    doi_valid = 0
    doi_invalid = 0
    total_entries = len(data)
    
    for row in data:
        pmid = row.get('publication_pmid', '')
        doi = row.get('publication_doi', '')
        
        if pmid_pattern.match(pmid):
            pmid_valid += 1
        else:
            pmid_invalid += 1
        
        if doi_pattern.match(doi):
            doi_valid += 1
        else:
            doi_invalid += 1
    
    # Print the results
    print(f"{pmid_valid} of {total_entries} PMIDs valid")
    print(f"{pmid_invalid} of {total_entries} PMIDs invalid")
    print(f"{doi_valid} of {total_entries} DOIs valid")
    print(f"{doi_invalid} of {total_entries} DOIs invalid")
    
    # Create metadata CSV file
    metadata_file_name = f"Metadata_{os.path.basename(csv_file_name)}"
    with open(metadata_file_name, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Metric', 'Value'])
        writer.writerow(['Total Entries', total_entries])
        writer.writerow(['Valid PMIDs', pmid_valid])
        writer.writerow(['Invalid PMIDs', pmid_invalid])
        writer.writerow(['Valid DOIs', doi_valid])
        writer.writerow(['Invalid DOIs', doi_invalid])
    
    print(f"Metadata written to '{metadata_file_name}'")

# Read CSV data
csv_data = read_csv(csv_file_name)

# Check PMIDs and DOIs and generate metadata
if csv_data:
    check_pmids_and_dois_and_generate_metadata(csv_data, csv_file_name)
else:
    print("No data to process.")
'''

'\n# Production of the DOME Registry fields validity data & subsequent metadata csv file \nimport csv\nimport re\nimport os\n\n# 3.1  Simple print of all DOME fields & simple explainer put into text file \n# for assisting with use of DOME Regsitry entry data\n# for header in csv_file_name:\n#    print(header)\n\n# Define regexes to check various CSV header field entries\n# Define the EPMC regex pattern for PMIDs\npmid_pattern = re.compile(r\'^\\d{8}$\')\n\n# Define the regex pattern for DOIs\ndoi_pattern = re.compile(r\'^10.\\d{4,9}/[-._;()/:A-Z0-9]+$\', re.IGNORECASE)\n\n# Function to read CSV data\ndef read_csv(file_name):\n    try:\n        with open(file_name, \'r\', encoding=\'utf-8\') as csvfile:\n            reader = csv.DictReader(csvfile)\n            data = [row for row in reader]\n        return data\n    except Exception as e:\n        print(f"Error reading the CSV file: {e}")\n        return None\n\n# Function to check PMIDs and DOIs and generate metadata\ndef check_pmids_

In [108]:
'''
# 3.1  Simple print of all DOME fields & simple explainer put into text file 
# for assisting with use of DOME Registry entry data
# reformat using dataframe the DOME entries csv columns into more logical format
import csv
import pandas as pd #because who doesn't love a panda 

#Read in DOME Entries CSV as dataframe via pandas library functions
print(csv_file_name)
DOME_Entries_dataframe = pd.read_csv(csv_file_name)

# View data frame to inspect all data appears ok
DOME_Entries_dataframe.head()
DOME_Entries_dataframe.shape

#Get number of entries in header row correpsonding to DOME Registry entries fields 
# from a given entry related to its originating JSON file
i=0
header_entries_for_text_file = []
for header_entry in (DOME_Entries_dataframe.columns):
     i = i+1
     header_entries_for_text_file.append(header_entry)
print('Number of DOME Registry field entries: ' + str(i))
print(header_entries_for_text_file)

# to remove redundant fields and check over these - TBC
#df = DOME_Entries_dataframe.reindex(sorted(DOME_Entries_dataframe.columns), axis=1)
#df.head()
# to create more metadata and graph of entries (to work and think on)
# to add and choose main ID for rows header
# to do DOI checks and regex
# to get PMC full text from DOI and store in local folder

#row_names = df['uuid']
#print(row_names)

# Set row names as shortid whichh corresponds to DOME Registry unique short id 
df = pd.DataFrame(df).set_index('shortid')
df.to_csv(csv_file_name, sep=',', index=True, encoding='utf-8')
'''


"\n# 3.1  Simple print of all DOME fields & simple explainer put into text file \n# for assisting with use of DOME Registry entry data\n# reformat using dataframe the DOME entries csv columns into more logical format\nimport csv\nimport pandas as pd #because who doesn't love a panda \n\n#Read in DOME Entries CSV as dataframe via pandas library functions\nprint(csv_file_name)\nDOME_Entries_dataframe = pd.read_csv(csv_file_name)\n\n# View data frame to inspect all data appears ok\nDOME_Entries_dataframe.head()\nDOME_Entries_dataframe.shape\n\n#Get number of entries in header row correpsonding to DOME Registry entries fields \n# from a given entry related to its originating JSON file\ni=0\nheader_entries_for_text_file = []\nfor header_entry in (DOME_Entries_dataframe.columns):\n     i = i+1\n     header_entries_for_text_file.append(header_entry)\nprint('Number of DOME Registry field entries: ' + str(i))\nprint(header_entries_for_text_file)\n\n# to remove redundant fields and check over th

In [109]:
'''
# 3.2

# Reorder metadata to start of columns list
# Define the prefixes to match and group csv data
prefix_matches_cols = 'matches_'
prefix_publications_cols= 'matches_publication'
prefix_data_cols= 'matches_data'
prefix_optimization_cols= 'matches_optimization'
prefix_model_cols= 'matches_model'
prefix_evaluation_cols= 'matches_evaluation'

# Separate columns based on whether they start with the prefix
matches_publication_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]
matches_data_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]
matches_optimization_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]
matches_model_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]
matches_evaluation_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]
other_columns = [col for col in df.columns if not col.startswith(prefix_matches_cols)]

# Reorder columns
reordered_columns = other_columns + matches_data_columns + matches_optimization_columns + matches_model_columns + matches_evaluation_columns
df = df[reordered_columns]

print(df.head())

df.to_csv(csv_file_name, sep=',', index=True, encoding='utf-8')
'''

"\n# 3.2\n\n# Reorder metadata to start of columns list\n# Define the prefixes to match and group csv data\nprefix_matches_cols = 'matches_'\nprefix_publications_cols= 'matches_publication'\nprefix_data_cols= 'matches_data'\nprefix_optimization_cols= 'matches_optimization'\nprefix_model_cols= 'matches_model'\nprefix_evaluation_cols= 'matches_evaluation'\n\n# Separate columns based on whether they start with the prefix\nmatches_publication_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]\nmatches_data_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]\nmatches_optimization_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]\nmatches_model_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]\nmatches_evaluation_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]\nother_columns = [col for col in df.columns if not col.startswith(prefix_mat

In [110]:
''' 
# View data frame to inspect all data appears ok
DOME_Entries_dataframe.head()
DOME_Entries_dataframe.shape

#Get number of entries in header row correpsonding to DOME Registry entries fields 
# from a given entry related to its originating JSON file
i=0
header_entries_for_text_file = []
for header_entry in (DOME_Entries_dataframe.columns):
     i = i+1
     header_entries_for_text_file.append(header_entry)
print('Number of DOME Registry field entries: ' + str(i))
print(header_entries_for_text_file)

# to remove redundant fields and check over these - TBC
df = DOME_Entries_dataframe.reindex(sorted(DOME_Entries_dataframe.columns), axis=1)
df.head()
# to create more metadata and graph of entries (to work and think on)
# to add and choose main ID for rows header
# to do DOI checks and regex
# to get PMC full text from DOI and store in local folder

row_names = df['uuid']
#print(row_names)

# Set row names as shortid whichh corresponds to DOME Registry unique short id 
df = pd.DataFrame(df).set_index('shortid')
df.head()


'''

        

" \n# View data frame to inspect all data appears ok\nDOME_Entries_dataframe.head()\nDOME_Entries_dataframe.shape\n\n#Get number of entries in header row correpsonding to DOME Registry entries fields \n# from a given entry related to its originating JSON file\ni=0\nheader_entries_for_text_file = []\nfor header_entry in (DOME_Entries_dataframe.columns):\n     i = i+1\n     header_entries_for_text_file.append(header_entry)\nprint('Number of DOME Registry field entries: ' + str(i))\nprint(header_entries_for_text_file)\n\n# to remove redundant fields and check over these - TBC\ndf = DOME_Entries_dataframe.reindex(sorted(DOME_Entries_dataframe.columns), axis=1)\ndf.head()\n# to create more metadata and graph of entries (to work and think on)\n# to add and choose main ID for rows header\n# to do DOI checks and regex\n# to get PMC full text from DOI and store in local folder\n\nrow_names = df['uuid']\n#print(row_names)\n\n# Set row names as shortid whichh corresponds to DOME Registry unique s

In [111]:
#3.2 reorder data frame
import pandas as pd

df = pd.read_csv(csv_file_name)

# Define the path to your CSV file
# csv_file_name = 'path_to_your_csv_file.csv'  # Replace with your actual file name

# Read in DOME Entries CSV as dataframe via pandas library functions
# df = pd.read_csv(csv_file_name)

# Define the prefixes to match and group csv data
prefix_publications_cols = 'publication_'
prefix_data_cols = 'matches_data'
prefix_optimization_cols = 'matches_optimization'
prefix_model_cols = 'matches_model'
prefix_evaluation_cols = 'matches_evaluation'

# Separate columns based on whether they start with the prefix
publication_columns = [col for col in df.columns if col.startswith(prefix_publications_cols)]
matches_data_columns = [col for col in df.columns if col.startswith(prefix_data_cols)]
matches_optimization_columns = [col for col in df.columns if col.startswith(prefix_optimization_cols)]
matches_model_columns = [col for col in df.columns if col.startswith(prefix_model_cols)]
matches_evaluation_columns = [col for col in df.columns if col.startswith(prefix_evaluation_cols)]
other_columns = [col for col in df.columns if not col.startswith('matches_') and not col.startswith('publication_')]

# Reorder columns
reordered_columns = (other_columns + publication_columns + matches_data_columns +
                     matches_optimization_columns + matches_model_columns + matches_evaluation_columns)
df = df[reordered_columns]

# Print the reordered DataFrame
#print(df.head())

df = pd.DataFrame(df).set_index('shortid')
df.to_csv(csv_file_name, sep=',', index=True, encoding='utf-8')

print(matches_publication_columns)

In [None]:
#DOIs to PMCIDs
import pandas as pd
import requests

# Define the path to your CSV file
csv_file_name = 'path_to_your_csv_file.csv'  # Replace with your actual file name

# Read in DOME Entries CSV as dataframe via pandas library functions
df = pd.read_csv(csv_file_name)

# Extract DOIs from the DataFrame
dois = df['publication_doi'].dropna().unique()

# Function to map DOIs to PMCIDs using NCBI E-utilities API
def map_dois_to_pmcids(dois):
    pmcid_mapping = {}
    for doi in dois:
        url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=my_tool&email=my_email@example.com&ids={doi}&format=json"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            records = data.get('records', [])
            if records:
                for record in records:
                    if 'pmcid' in record:
                        pmcid_mapping[doi] = record['pmcid']
                    else:
                        pmcid_mapping[doi] = None
        else:
            pmcid_mapping[doi] = None
    return pmcid_mapping

# Map DOIs to PMCIDs
doi_to_pmcid_mapping = map_dois_to_pmcids(dois)

# Add the mapped PMCIDs to the DataFrame
df['mapped_pmcid'] = df['publication_doi'].map(doi_to_pmcid_mapping)

# TO UPDATE FILE NAMING TO CORRELATE BETTER

# Save the updated DataFrame to a new CSV file
output_csv_file_name = 'updated_DOME_Registry_Contents.csv'
df.to_csv(output_csv_file_name, index=False)

# Print the updated DataFrame
print(df.head())

#TO FIX REQUEST INTO SMALLER BATCHES VS SINGLE REQUESTS FOR SPEED
#to add number of pmcids missing readout for metadata file below

      shortid                       _id                   created  public  \
0  6i0xepuivt  63516fedb9c880af1f305b5c  2022-09-01T15:16:05.444Z    True   
1  nlj5x3dld8  63516fedb9c880af1f305b93  2022-09-01T15:16:05.445Z    True   
2  ysqyy92zyr  66030aaa1502715bfe53d65c  2024-03-26T17:49:30.048Z    True   
3  qx3ex71jye  66041e5d1502715bfe53d70a  2024-03-27T13:25:49.790Z    True   
4  v536tc3b5t  63516fedb9c880af1f305b1c  2022-09-01T15:16:05.443Z    True   

                                 publication_authors  publication_created  \
0               Wang H, Zheng H, Simpson D, Azuaje F                  NaN   
1               Al-Shahib A, Breitling R, Gilbert DR                  NaN   
2  Hui Lan, Rachel Carson , Nicholas J Provart an...                  NaN   
3  Blaise Gassend, Charles W O'Donnell, William T...                  NaN   
4                  Tsai RT, Dai HJ, Huang CH, Hsu WL                  NaN   

               publication_doi  publication_done publication_journal  \
0 

In [None]:
#Metadata file readout as a csv and text file to explain contents - TO ADD

In [116]:
# Download using epmc api the full text using pmcids into folder
 
import pandas as pd
import requests
import os

# Define the path to your CSV file
csv_file_name = 'updated_DOME_Registry_Contents.csv'  # Replace with your actual file name

# Read in DOME Entries CSV as dataframe via pandas library functions
df = pd.read_csv(csv_file_name)

# Extract PMCIDs from the DataFrame
pmcids = df['mapped_pmcid'].dropna().unique()

# Define the output folder for full text files
output_folder = 'PMC_Full_Texts'
os.makedirs(output_folder, exist_ok=True)

# Function to download full text for each PMCID using Europe PMC API
def download_full_text(pmcids):
    for pmcid in pmcids:
        url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextXML"
        response = requests.get(url)
        if response.status_code == 200:
            full_text = response.text
            output_file = os.path.join(output_folder, f"{pmcid}.xml")
            with open(output_file, 'w', encoding='utf-8') as file:
                file.write(full_text)
            print(f"Full text for PMCID {pmcid} saved to '{output_file}'")
        else:
            print(f"Failed to retrieve full text for PMCID {pmcid}. Status code: {response.status_code}")

# Download full text for each PMCID
download_full_text(pmcids)

# print how many successfully downloaded and how many failed to download - TO ADD


Full text for PMCID PMC1421439 saved to 'PMC_Full_Texts/PMC1421439.xml'
Full text for PMCID PMC1847686 saved to 'PMC_Full_Texts/PMC1847686.xml'
Full text for PMCID PMC2213690 saved to 'PMC_Full_Texts/PMC2213690.xml'
Full text for PMCID PMC1892091 saved to 'PMC_Full_Texts/PMC1892091.xml'
Full text for PMCID PMC2638158 saved to 'PMC_Full_Texts/PMC2638158.xml'
Full text for PMCID PMC2665034 saved to 'PMC_Full_Texts/PMC2665034.xml'
Full text for PMCID PMC2275242 saved to 'PMC_Full_Texts/PMC2275242.xml'
Full text for PMCID PMC2561051 saved to 'PMC_Full_Texts/PMC2561051.xml'
Full text for PMCID PMC2660303 saved to 'PMC_Full_Texts/PMC2660303.xml'
Full text for PMCID PMC2752621 saved to 'PMC_Full_Texts/PMC2752621.xml'
Full text for PMCID PMC3009519 saved to 'PMC_Full_Texts/PMC3009519.xml'
Failed to retrieve full text for PMCID PMC3169429. Status code: 404
Full text for PMCID PMC3542245 saved to 'PMC_Full_Texts/PMC3542245.xml'
Full text for PMCID PMC3396452 saved to 'PMC_Full_Texts/PMC3396452.x

In [None]:
# Metadata file readout as CSV and text file to explain contents and graph visualisation of data validation 

'import csv\nimport requests\nimport os\n\n# Define the path to the "Valid DOME Registry" CSV file\nvalid_csv_file_name = \'valid_DOME_Registry_Contents.csv\'  # Replace with your actual file name\n\n# Define the output folder for the mapped identifiers\noutput_folder = \'Mapped_Identifiers\'\nos.makedirs(output_folder, exist_ok=True)\n\n# Function to read CSV data\ndef read_csv(file_name):\n    try:\n        with open(file_name, \'r\', encoding=\'utf-8\') as csvfile:\n            reader = csv.DictReader(csvfile)\n            data = [row for row in reader]\n        return data\n    except Exception as e:\n        print(f"Error reading the CSV file: {e}")\n        return None\n\n# Function to map PMIDs to PMCIDs using NCBI E-utilities API\ndef map_pmids_to_pmcids(pmids):\n    pmid_str = \',\'.join(pmids)\n    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&db=pmc&id={pmid_str}&retmode=json"\n    response = requests.get(url)\n    if response.status_code == 