In [1]:
import pandas as pd
import os
import requests
import gzip #Combine test and train files
from xml.etree import ElementTree as ET



In [2]:
os.getcwd()

'C:\\Users\\devsa\\Documents\\Pubtator'

In [3]:
# Combining Gene-Disease test & train data files
# this is from: https://figshare.com/articles/dataset/CoCoScore_Supplementary_Data_v1_0/7198280/1
file1_path = 'dataset_9606_-26_test.tsv.gz'
file2_path = 'dataset_9606_-26_train.tsv.gz'
output_file_path = 'Gene_Disease.tsv.gz'

# Function to combine two TSV files
def combine_tsv_files(file1_path, file2_path, output_file_path):
    with gzip.open(file1_path, 'rb') as file1:
        data1 = file1.read()
    
    with gzip.open(file2_path, 'rb') as file2:
        data2 = file2.read()
    
    combined_data = data1 + data2

    with gzip.open(output_file_path, 'wb') as output_file:
        output_file.write(combined_data)

In [4]:
combine_tsv_files(file1_path, file2_path, output_file_path)
print("Files combined successfully and saved to", output_file_path)

Files combined successfully and saved to Gene_Disease.tsv.gz


In [5]:
# Specify the path to your TSV file
file_path = 'Gene_Disease.tsv.gz'

# Specify column names (replace with your actual column names)
column_names = ['PMID', 'Paragraph No', 'Sentence No', 'Diseas ID', 'Gene ID', 'Text', 'Association_label', 'H']

# Read the tab-separated TSV file into a DataFrame
df = pd.read_csv(file_path, sep='\t', names=column_names)

df['PMID'] = df['PMID'].astype(str)

# Display the first few rows of the DataFrame
print(df.head())

    PMID  Paragraph No  Sentence No   Diseas ID          Gene ID  \
0   1401             2            1    DOID:684  ENSP00000448059   
1  11906             2            1   DOID:2752  ENSP00000305692   
2  15739             2            1  DOID:14749  ENSP00000274813   
3  22474             2            4    DOID:684  ENSP00000357066   
4  36611             2            3  DOID:12800  ENSP00000264914   

                                                Text  Association_label    H  
0  Stationary-phase, minimal deviation UNKDISEASE...                  0   89  
1  We describe an improved method for detecting d...                  1    2  
2  We report a method for rapid prenatal detectio...                  1   52  
3  The UNKGENE (UNKGENE), UNKGENE (UNKGENE), and ...                  0  131  
4  Deficiency of UNKGENE (ARS(B)) is associated w...                  1   75  


In [6]:
len(df)

587208

In [7]:
def fetch_pubmed_concepts(df, max_pmid_count):
    
    pmid_first_n = df['PMID'][:max_pmid_count]
    querylist = pmid_first_n.tolist()
    pubtator_api_url = "https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml"
    concept_to_pmids = {}  # Dictionary to store concept names as keys and PubMed IDs as values

    # Query the list as per API states
    for pmid in querylist:
        url = f"{pubtator_api_url}?pmids={pmid}&concepts=gene"
        response = requests.get(url)

        print(f"Submitting request for PMID {pmid}: {url}")

        if response.status_code == 200:
            content = response.content
            root = ET.fromstring(content)
            for passage in root.iter('passage'):
                for annotation in passage.iter('annotation'):
                    concept_id = annotation.find("infon[@key='identifier']").text
                    concept_name = annotation.find("text").text
                    concept_type = annotation.find("infon[@key='type']").text

                    # Check if the concept name is already in the dictionary
                    if concept_name in concept_to_pmids:
                        # Check if the PubMed ID is not already in the list of values
                        if pmid not in concept_to_pmids[concept_name]:
                            concept_to_pmids[concept_name].append(pmid)
                    else:
                        concept_to_pmids[concept_name] = [pmid]
        else:
            print(f"Request for PMID {pmid} failed with status code {response.status_code}")

    return concept_to_pmids


In [8]:
# Usage
# df is the DataFrame, and you can specify the max number of PMIDs you want to process
concepts_foundin_pmids = fetch_pubmed_concepts(df, max_pmid_count=1000)

Submitting request for PMID 1401: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1401&concepts=gene
Submitting request for PMID 11906: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=11906&concepts=gene
Submitting request for PMID 15739: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=15739&concepts=gene
Submitting request for PMID 22474: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=22474&concepts=gene
Submitting request for PMID 36611: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=36611&concepts=gene
Submitting request for PMID 46753: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=46753&concepts=gene
Submitting request for PMID 48211: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=48211&concepts=gene
Submitting request for

Submitting request for PMID 180723: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=180723&concepts=gene
Submitting request for PMID 181398: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=181398&concepts=gene
Submitting request for PMID 182362: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=182362&concepts=gene
Submitting request for PMID 188519: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=188519&concepts=gene
Submitting request for PMID 188519: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=188519&concepts=gene
Submitting request for PMID 192899: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=192899&concepts=gene
Submitting request for PMID 192899: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=192899&concepts=gene
Submit

Submitting request for PMID 383332: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=383332&concepts=gene
Submitting request for PMID 384139: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=384139&concepts=gene
Submitting request for PMID 385195: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=385195&concepts=gene
Submitting request for PMID 400438: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=400438&concepts=gene
Submitting request for PMID 401675: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=401675&concepts=gene
Submitting request for PMID 401730: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=401730&concepts=gene
Submitting request for PMID 401730: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=401730&concepts=gene
Submit

Submitting request for PMID 583090: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=583090&concepts=gene
Submitting request for PMID 583403: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=583403&concepts=gene
Submitting request for PMID 583403: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=583403&concepts=gene
Submitting request for PMID 600965: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=600965&concepts=gene
Submitting request for PMID 604196: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=604196&concepts=gene
Submitting request for PMID 615178: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=615178&concepts=gene
Submitting request for PMID 616039: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=616039&concepts=gene
Submit

Submitting request for PMID 824017: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=824017&concepts=gene
Submitting request for PMID 824017: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=824017&concepts=gene
Submitting request for PMID 826357: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=826357&concepts=gene
Submitting request for PMID 826357: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=826357&concepts=gene
Submitting request for PMID 826357: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=826357&concepts=gene
Submitting request for PMID 826357: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=826357&concepts=gene
Submitting request for PMID 826588: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=826588&concepts=gene
Submit

Submitting request for PMID 1110865: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1110865&concepts=gene
Submitting request for PMID 1110865: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1110865&concepts=gene
Submitting request for PMID 1115512: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1115512&concepts=gene
Submitting request for PMID 1116148: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1116148&concepts=gene
Submitting request for PMID 1117133: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1117133&concepts=gene
Submitting request for PMID 1122632: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1122632&concepts=gene
Submitting request for PMID 1136541: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1136541&concep

Submitting request for PMID 1284333: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284333&concepts=gene
Submitting request for PMID 1284333: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284333&concepts=gene
Submitting request for PMID 1284477: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284477&concepts=gene
Submitting request for PMID 1284534: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284534&concepts=gene
Submitting request for PMID 1284535: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284535&concepts=gene
Submitting request for PMID 1284535: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284535&concepts=gene
Submitting request for PMID 1284539: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1284539&concep

Submitting request for PMID 1312896: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1312896&concepts=gene
Submitting request for PMID 1312896: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1312896&concepts=gene
Submitting request for PMID 1313984: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1313984&concepts=gene
Submitting request for PMID 1314136: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1314136&concepts=gene
Submitting request for PMID 1314165: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1314165&concepts=gene
Submitting request for PMID 1314761: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1314761&concepts=gene
Submitting request for PMID 1314761: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1314761&concep

Submitting request for PMID 1341477: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1341477&concepts=gene
Submitting request for PMID 1342477: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1342477&concepts=gene
Submitting request for PMID 1342640: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1342640&concepts=gene
Submitting request for PMID 1343225: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1343225&concepts=gene
Submitting request for PMID 1345166: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1345166&concepts=gene
Submitting request for PMID 1345744: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1345744&concepts=gene
Submitting request for PMID 1346083: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1346083&concep

Submitting request for PMID 1363589: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1363589&concepts=gene
Submitting request for PMID 1365889: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1365889&concepts=gene
Submitting request for PMID 1367855: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1367855&concepts=gene
Submitting request for PMID 1370301: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1370301&concepts=gene
Submitting request for PMID 1370306: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1370306&concepts=gene
Submitting request for PMID 1370353: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1370353&concepts=gene
Submitting request for PMID 1370365: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1370365&concep

Submitting request for PMID 1377820: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1377820&concepts=gene
Submitting request for PMID 1377893: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1377893&concepts=gene
Submitting request for PMID 1378089: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1378089&concepts=gene
Submitting request for PMID 1378393: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1378393&concepts=gene
Submitting request for PMID 1378801: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1378801&concepts=gene
Submitting request for PMID 1378835: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1378835&concepts=gene
Submitting request for PMID 1378862: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1378862&concep

Submitting request for PMID 1402895: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1402895&concepts=gene
Submitting request for PMID 1404883: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1404883&concepts=gene
Submitting request for PMID 1405404: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1405404&concepts=gene
Submitting request for PMID 1411288: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1411288&concepts=gene
Submitting request for PMID 1413493: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1413493&concepts=gene
Submitting request for PMID 1414829: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1414829&concepts=gene
Submitting request for PMID 1415186: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1415186&concep

Submitting request for PMID 1445744: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1445744&concepts=gene
Submitting request for PMID 1445805: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1445805&concepts=gene
Submitting request for PMID 1446625: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1446625&concepts=gene
Submitting request for PMID 1447574: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1447574&concepts=gene
Submitting request for PMID 1449258: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1449258&concepts=gene
Submitting request for PMID 1450460: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1450460&concepts=gene
Submitting request for PMID 1451338: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1451338&concep

Submitting request for PMID 1485950: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1485950&concepts=gene
Submitting request for PMID 1485950: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1485950&concepts=gene
Submitting request for PMID 1486085: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1486085&concepts=gene
Submitting request for PMID 1486691: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1486691&concepts=gene
Submitting request for PMID 1486691: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1486691&concepts=gene
Submitting request for PMID 1486864: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1486864&concepts=gene
Submitting request for PMID 1488222: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1488222&concep

Submitting request for PMID 1532632: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1532632&concepts=gene
Submitting request for PMID 1532910: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1532910&concepts=gene
Submitting request for PMID 1532910: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1532910&concepts=gene
Submitting request for PMID 1533481: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1533481&concepts=gene
Submitting request for PMID 1534143: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1534143&concepts=gene
Submitting request for PMID 1536179: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1536179&concepts=gene
Submitting request for PMID 1536955: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1536955&concep

Submitting request for PMID 1569192: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1569192&concepts=gene
Submitting request for PMID 1569604: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1569604&concepts=gene
Submitting request for PMID 1569604: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1569604&concepts=gene
Submitting request for PMID 1569604: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1569604&concepts=gene
Submitting request for PMID 1570151: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1570151&concepts=gene
Submitting request for PMID 1570151: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1570151&concepts=gene
Submitting request for PMID 1570151: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1570151&concep

Submitting request for PMID 1607712: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1607712&concepts=gene
Submitting request for PMID 1609264: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1609264&concepts=gene
Submitting request for PMID 1609826: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1609826&concepts=gene
Submitting request for PMID 1611773: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1611773&concepts=gene
Submitting request for PMID 1612429: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1612429&concepts=gene
Submitting request for PMID 1614874: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1614874&concepts=gene
Submitting request for PMID 1615942: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1615942&concep

Submitting request for PMID 1652893: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1652893&concepts=gene
Submitting request for PMID 1652893: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1652893&concepts=gene
Submitting request for PMID 1654964: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1654964&concepts=gene
Submitting request for PMID 1656362: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1656362&concepts=gene
Submitting request for PMID 1656362: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1656362&concepts=gene
Submitting request for PMID 1656362: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1656362&concepts=gene
Submitting request for PMID 1662049: https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/biocxml?pmids=1662049&concep

In [10]:
concepts_foundin_pmids

{'phenylalanine hydroxylase': ['1401', '489556'],
 'alpha-glucosidase': ['11906'],
 'insulin': ['22474',
  '182362',
  '323439',
  '353918',
  '739270',
  '842341',
  '1105371',
  '1110865',
  '1144451',
  '1147652',
  '1285360',
  '1307748',
  '1333648',
  '1339481',
  '1377520',
  '1442714',
  '1459319',
  '1550404',
  '1550446',
  '1592883',
  '1641148'],
 'arylsulfatase A': ['36611'],
 'ARS(A)': ['36611'],
 'ARS(B)': ['36611'],
 'mitochondrial aconitase': ['36611'],
 'alpha-1': ['48211'],
 'alpha-2': ['48211', '71842'],
 'Antithrombin': ['48766'],
 'IgA': ['52952', '651380', '926106', '1469522'],
 'alpha-1-antitrypsin': ['58261', '314560', '1441675', '1542282'],
 'alpha1-antitrypsin': ['58261'],
 'acid beta-galactosidase': ['62026'],
 'albumin': ['65455', '489556', '1498561', '1542282', '1577950'],
 'alpha-2-macroglobulin': ['71842'],
 'transferrin': ['71842', '1222826', '1498561'],
 'haptoglobin': ['71842', '1222826'],
 'HLA-A': ['77056', '480486', '677553', '1341477', '1349923', 

In [11]:
#Checking with CoCo Score - Assosiation Label (0) i.e., not valid here

In [13]:

# Changelog (after Micheal's code) 

# 1.  I merged test and train into one dataset
# 2.  Checked if we can give more that 100 through get method, I tried with 1000 worked for me. 
#     But It takes 3 mins for every 100 request. Did not time how long it took for 1000 - But definitely less than 30 mins.
# 3.  Created a dictionary to store output (#Key: Concept_name, Value: Pubmed_id's) - 
#     created a condition for the PubMed ID to be added to the list of values if it's not already present in the list for 
#     a given concept name.
# 4.  Encapsulated the code to "fetch_pubmed_concepts" function to be used recursively. 
#     Takes your DataFrame "df" and an optional parameter max_pmid_count to specify the maximum number of PMIDs you want to 
#     process. It returns the dictionary as the result.
 
#  - Siva
