In [None]:
import pickle
from known import find_known_results
import py2neo
import normalize_nodes
from lxml import etree
from collections import defaultdict
import requests
import time

from ars import retrieve_ars_results

utf8_parser = etree.XMLParser(encoding='utf-8')

def run_score(pk):
    results = retrieve_ars_results(pk)
    for ars, response in results.items():
        known, unknown = find_known_results(response)
#        nn_distance = calculate_nn_distance(known, unknown, response)
        recency = calculate_recency(known, response,verbose=True)
#run_score(123)

In [None]:
def getPublistForPairs(disease_idx,drug_identifier_list):
    pubs_for_pairs = {}
    #queryROBOKOPForPMIDsDiseaseDrugList
    all_pubs = set()
    normalized_drug_dict = normalize_nodes.normalize_big_list(drug_identifier_list)
    normalized_drug_idxs = set()
    for drug_key in drug_identifier_list:
        if(drug_key not in normalized_drug_dict or normalized_drug_dict[drug_key]==None): drug_idx = drug_key
        else: normalized_drug_idxs.add(normalized_drug_dict[drug_key][0])
        #Drug_dict[drug_key]==None means the identifier can't be normalized. We won't be able to 
        #find it in ROBOKOP either.
        #Otherwise we want to set the queried identifier to the normalized identifier.
            
    publications_from_rk = queryROBOKOPForPMIDsDiseaseDrugList(disease_idx,list(normalized_drug_idxs))
    
    #Denormalize the drug and make a dictonary for the publication.
    denormed_publications = {}
    for drug_key in drug_identifier_list:
        if(drug_key not in normalized_drug_dict or normalized_drug_dict[drug_key]==None): 
            denormed_publications[(disease_idx,drug_key)] = []
        else: 
            drug_idx = normalized_drug_dict[drug_key][0]
            denormed_publications[(disease_idx,drug_key)] = publications_from_rk[(disease_idx,drug_key)]
        all_pubs.update(publications_from_rk[(disease_idx,drug_key)])
    return denormed_publications, all_pubs

In [None]:
def getQueryNode(json_data):
    query_node = json_data['message']['query_graph']['nodes']

    if 'on' in query_node:
    #Weird gene query
        if query_node['on'].get('categories',[]) == ['biolink:Gene']:
            query_idx = "biolink:Gene"
            disease = False
        elif('ids' in query_node['on']):
            query_idx = query_node['on']['ids'][0]
            disease = True
        else:
            query_idx = "N/A"
            disease = False
    elif 'n0' in query_node:
        if('ids' in query_node['n0']):
            query_idx = query_node['n0']['ids'][0]
            disease = True
        elif('id' in query_node['n1']):
            query_idx = query_node['n1']['id']
            disease = True
        else:
            query_idx = "N/A"
            disease = True
    elif 'disease' in query_node:
        query_idx = query_node['disease']['ids'][0]
        disease = True
    #Could not find any node
    else:
        query_idx = "N/A"
        disease=False
    return (query_idx,disease)
            

def drug_idx_generator(json_data):
    for result in json_data['message'].get('results',[]):
        node_bindings = result['node_bindings']
        if 'sn' in node_bindings:
            drug_idx = node_bindings['sn'][0]['id']
        elif 'n1' in node_bindings:
            drug_idx = node_bindings['n1'][0]['id']
        elif 'drug' in node_bindings:
            drug_idx = node_bindings['drug'][0]['id']
        elif "chemical" in node_bindings:
            drug_idx = node_bindings['chemical'][0]['id']
        else:
            print(node_bindings)
            raise Exception("Could not get Drug")
        
        #NORMALIZE DRUG IDENTIFIER
        #drug_idx = m[drug_idx]
        yield drug_idx
    return
    '''        if drug_idx not in output_data[disease_idx]:
            output_data[disease_idx][drug_idx] = [disease_idx, drug_idx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        
        #Noticed some Aragorn queries did not have scores in the results
        if 'score' not in result:
            return "error"
        score = result['score']
        if 'normalized_score' in result:
            normalized_score = result['normalized_score']
        else:
            normalized_score = 0
        output_data[disease_idx][drug_idx][ara_indexing[ara]] = score
        output_data[disease_idx][drug_idx][ara_indexing[ara] + 1] = normalized_score
    return "success"'''


In [None]:
def queryROBOKOPForPMIDsDiseaseDrugList(disease_idx,drug_list):
    '''
    This function uses Py2Neo to query the ROBOKOPKG for each TextMiningKP edge for each pair
    in the disease and drug list we found in the graph.
    '''
    graph = py2neo.Graph(host="robokopkg.renci.org")
    query_template = 'MATCH (d:`biolink:Disease`)-[r]-(c:`biolink:ChemicalEntity`) WHERE d.id=$disease_idx AND c.id in $drug_list AND r.`biolink:primary_knowledge_source`="infores:textminingkp" RETURN c.id, r.publications'
    publications_for_pair = defaultdict(set)
    query_res = graph.run(query_template,parameters={"disease_idx":disease_idx, "drug_list":drug_list})
    for message in query_res:
#        for publication in message['r.publications']:
        c_idx = message['c.id']
        pub_list = message['r.publications']
        publications_for_pair[(disease_idx,c_idx)].update(pub_list)
    return publications_for_pair

In [None]:
def PMCIDsToYear(pmcids):
    time.sleep(0.2)
    pmc_api_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=" + ','.join(pmcids)
    #print(pmc_api_url)
    time.sleep(0.2)
    r = requests.get(pmc_api_url)
    s = r.text.encode('utf-8')
    article_to_year = {}
    if(len(s)>250):
        tree = etree.fromstring(s, parser=utf8_parser)
        for meta_ele in tree.xpath("//article-meta"):
            pmc_idx = "PMC" + meta_ele.xpath("./article-id[@pub-id-type='pmc']/text()")[0]
            #ub-date pub-type="epub">
            pub_year_list = meta_ele.xpath("./pub-date/year/text()")
            earliest_year = min([int(x) for x in pub_year_list])
            article_to_year[pmc_idx] = earliest_year
    return article_to_year

def PMIDsToYear(pmids):
    time.sleep(0.2)
    pm_api_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" + ','.join(pmids)
    r = requests.get(pm_api_url)
    s = r.text.encode('utf-8')
    article_to_year = {}
    if(len(s)>250):
        tree = etree.fromstring(s, parser=utf8_parser)
        for article_ele in tree.xpath("//PubmedArticle"):
            pm_idx = "PMID:" + article_ele.xpath("./MedlineCitation/PMID/text()")[0]
            pub_year_list = article_ele.xpath("./PubmedData/History/PubMedPubDate/Year/text()")
            earliest_year = min([int(x) for x in pub_year_list])
            article_to_year[pm_idx] = earliest_year
    return article_to_year

In [None]:
def chunk_list(l):
    for i in range(0,len(l),250):
        lower = i
        upper = min(i+250,len(l))
        yield l[lower:upper]

def getDatesFromPaperIdentifiers(paper_ids):
    paper_idx_to_date = {}
    pubmed_ids = [x for x in paper_ids if "PMC" not in x]
    #Break our queries into chunks of length 250.
    for chunk in chunk_list(pubmed_ids):
        paper_idx_to_date.update(PMIDsToYear(chunk))
    
    pmc_ids = [x for x in paper_ids if "PMC" in x]
    for chunk in chunk_list(pmc_ids):
        paper_idx_to_date.update(PMCIDsToYear(chunk))
    return paper_idx_to_date

In [None]:
def calculate_recency(known, response,verbose=False):
    """
    Calculate the recency of each chemical in the response.
    Look on the known edge, and find the earliest support for the edge using the TM PMID API
    see
     https://github.com/UCDenver-ccp/DocumentMetadataAPI/blob/main/README.md
     
    The general flow of this code is as follows. 
    1) Parse the TRAPI and find the relevant disease curies and list of drug curies from the response.
    2) Normalize the list of drug curies.
    3) Go to the ROBOKOPKG, ask it specifically for the TextMineKP edges between the Disease and Drug nodes.
    4) Go to Pubmed, get the year of publication for every paper we found in step 3.
    5) Break the results up into each drug-disease pair, and report them back.
    
    Step 3 and 4 are complicated by two factors. One is that it is substantially easier to query ROBOKOPKG and
    the Pubmed API with bulk queries then individually. It would produce a better logical flow of the code to
    go through every individual disease-drug pair, send a query to ROBOKOP to get a list of publications, then send
    a query to Pubmed asking for the year each publication was published. Unfortunately, that takes prohibitively 
    long to run. The second issue is normalization of drugs; I don't want to drop any identifier information, so part 
    of the code is juggling the unnormalized and normalized drug identifiers.

    """
    #Get the drug identifier from the TRAPI Query.
    disease_idx = getQueryNode(response)[0]
    if(verbose):print("We found this TRAPI had disease identifier:",disease_idx)
    #Get the list of all drug identifiers from the TRAPI query. This list
    # is not necessarily normalized to the specifications of node normalizer.
    unnormalized_drug_list = list(drug_idx_generator(response))
    if(verbose):print("We found this TRAPI had the following numbers of drug identifers:",len(unnormalized_drug_list))
    #Go to ROBOKOPKG, query each disease-drug pair; find those with an edge from TextMiningKP.
    # For those pairs with edges from TextMiningKP, return those publications as both a dictionary
    # and as a large set of all identifiers we came across (we need this set of all identifiers
    # to make querying Pubmed simplier in our next step).
    pubs_for_pairs, all_pubs = getPublistForPairs(disease_idx,unnormalized_drug_list)
    if(verbose):print("We found this TRAPI had the following numbers of publications from TextMiningKP:",len(all_pubs))

    # Generate a dictonary for the earliest year of publication for each paper 
    # identifer we found in our ROBOKOP query.
    pub_to_date = getDatesFromPaperIdentifiers(all_pubs)
    if(verbose):print("We have finished querying Pubmed and PubmedCentral for publication dates.")
    recency = {}
    
    for drug_idx in unnormalized_drug_list:        
        pub_list = pubs_for_pairs[(disease_idx,drug_idx)]
        if(len(pub_list)!=0): earliest_year = min([pub_to_date[paper_idx] for paper_idx in pub_list])
        else: earliest_year = None
        if(verbose):
            print(disease_idx,drug_idx)
            print("List of pubs",pub_list)
            print(earliest_year)
            print("----------")
        recency[drug_idx] = earliest_year
        
    return recency


In [None]:
d = run_score("debec37a-a281-47a5-a3d6-dc31206c571f")
print(d)

In [None]:
results = pickle.load( open( "save.p", "rb" ) )
for ars, response in results.items():
    known, unknown = find_known_results(response)
#        nn_distance = calculate_nn_distance(known, unknown, response)
    recency = calculate_recency(known, response)

In [None]:
def getQueryNode(json_data):
    query_node = json_data['message']['query_graph']['nodes']

    if 'on' in query_node:
    #Weird gene query
        if query_node['on'].get('categories',[]) == ['biolink:Gene']:
            query_idx = "biolink:Gene"
            disease = False
        elif('ids' in query_node['on']):
            query_idx = query_node['on']['ids'][0]
            disease = True
        else:
            query_idx = "N/A"
            disease = False
    elif 'n0' in query_node:
        if('ids' in query_node['n0']):
            query_idx = query_node['n0']['ids'][0]
            disease = True
        elif('id' in query_node['n1']):
            query_idx = query_node['n1']['id']
            disease = True
        else:
            query_idx = "N/A"
            disease = True
    elif 'disease' in query_node:
        query_idx = query_node['disease']['ids'][0]
        disease = True
    #Could not find any node
    else:
        query_idx = "N/A"
        disease=False
    return (query_idx,disease)
            

def drug_idx_generator(json_data):
    for result in json_data['message'].get('results',[]):
        node_bindings = result['node_bindings']
        if 'sn' in node_bindings:
            drug_idx = node_bindings['sn'][0]['id']
        elif 'n1' in node_bindings:
            drug_idx = node_bindings['n1'][0]['id']
        elif 'drug' in node_bindings:
            drug_idx = node_bindings['drug'][0]['id']
        elif "chemical" in node_bindings:
            drug_idx = node_bindings['chemical'][0]['id']
        else:
            print(node_bindings)
            raise Exception("Could not get Drug")
        
        #NORMALIZE DRUG IDENTIFIER
        #drug_idx = m[drug_idx]
        yield drug_idx
    return
    '''        if drug_idx not in output_data[disease_idx]:
            output_data[disease_idx][drug_idx] = [disease_idx, drug_idx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        
        #Noticed some Aragorn queries did not have scores in the results
        if 'score' not in result:
            return "error"
        score = result['score']
        if 'normalized_score' in result:
            normalized_score = result['normalized_score']
        else:
            normalized_score = 0
        output_data[disease_idx][drug_idx][ara_indexing[ara]] = score
        output_data[disease_idx][drug_idx][ara_indexing[ara] + 1] = normalized_score
    return "success"'''


In [None]:
getQueryNode(response)

In [None]:
response.keys()

In [None]:
graph = py2neo.Graph(host="robokopkg.renci.org")


In [None]:


#def normalDrugIdentifiersGenerator(drug_list):
    

def queryROBOKOPForPMIDsDiseaseDrug(disease_idx,drug_idx):
    graph = py2neo.Graph(host="robokopkg.renci.org")
    query_template = 'MATCH (d:`biolink:Disease`)-[r]-(c:`biolink:ChemicalEntity`) WHERE d.id=$disease_idx AND c.id=$drug_idx AND r.`biolink:primary_knowledge_source`="infores:textminingkp" RETURN r.publications'
    pubs_for_res=[]
    query_res = graph.run(query_template,parameters={"disease_idx":disease_idx, "drug_idx":drug_idx})
    for message in query_res:
#        for publication in message['r.publications']:
        pubs_for_res.extend(message['r.publications'])
    return pubs_for_res


    #    break
    
def getEarliestPublicationDateForPairs(disease_idx,drug_identifier_list,verbose=False):
    
    pubs_for_pairs, all_pubs = getPublistForPairs(disease_idx,drug_identifier_list)
    pub_to_date = getDatesFromPMIDs(all_pubs)
    for drug_idx in drug_identifier_list:
        pub_list = pubs_for_pairs[(disease_idx,drug_idx)]
        if(len(pub_list)!=0):
            earliest_year = min([pub_to_date[paper_idx] for paper_idx in pub_list])
        else:
            earliest_year = None
#        date = getEarlyDate(pubs_for_res)
        if(verbose):
            print(disease_idx,drug_key)
            print("List of pubs",pub_list)
            print(earliest_year)
            print("----------")
        yield (disease_idx,drug_key,earliest_year)
        
        



#        date = getEarlyDate(pubs_for_res)
#        if(verbose):
#            print(disease_idx,drug_key)
#            print("List of pubs",pubs_for_res)
#            print(date)
#            print("----------")
#        yield (disease_idx,drug_key,date)



In [None]:
queryROBOKOPForPMIDsDiseaseDrug("MONDO:0005148","CHEBI:32677")

In [None]:
queryROBOKOPForPMIDsDiseaseDrugList("MONDO:0005148",["CHEBI:32677"])

In [None]:
run_score('c01a5839-975f-4352-8aa3-5490d93adfc6')

In [None]:
pubs_for_res

In [None]:
query_res['r.publications']

In [None]:
for i in range(0,740,250):
    print(i)

In [None]:





def chunk_list(l):
    for i in range(0,len(l),250):
        lower = i
        upper = min(i+250,len(l))
        yield l[lower:upper]

def getDatesFromPaperIdentifiers(paper_ids):
    paper_idx_to_date = {}
    pubmed_ids = [x for x in paper_ids if "PMC" not in x]
    #Break our queries into chunks of length 250.
    for chunk in chunk_list(pubmed_ids):
        paper_idx_to_date.update(PMIDsToYear(chunk))
    
    pmc_ids = [x for x in paper_ids if "PMC" in x]
    for chunk in chunk_list(pmc_ids):
        paper_idx_to_date.update(PMCIDsToYear(chunk))
    return paper_idx_to_date
    
def getEarlyDate(pmids_all):
    if(len(pmids_all)==0):return None
    print("starting func")
    date_list = getDatesFromListPMIDs(pmids_all)
    print('v2',date_list)
    if(len(date_list)==0):return None
    
    return min([int(x) for x in date_list])

In [None]:
pmids = ['PMC6182468', 'PMID:21079458', 'PMC5740191', 'PMC5740191']
getDatesFromPMIDs(pmids)
test_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=" + ','.join(pmids)
    #print(test_url)
    #There's some weird issue with querying pubmed to quickly, easier just to have it take a pause.
time.sleep(0.2)
r = requests.get(test_url)
s = r.text.encode('utf-8')
print('query2',len(s))
if(len(s)>250):
    tree = etree.fromstring(s, parser=utf8_parser)
    tree.xpath("//year/text()")

In [None]:
["PMC6182468","PMC5740191","PMC5740191"]


In [None]:
pmids = ['PMC3321664', 'PMID:15596736', 'PMID:16533138', 'PMC3136450', 'PMID:15652415', 'PMC4164261', 'PMC3497294', 'PMC3498184', 'PMC3497350', 'PMC3521715']
#print(getDatesFromListPMIDs(pmids))
d = getDatesFromPMIDs(pmids)
#print(getEarlyDate(pmids))
print(d)

In [None]:
tree
for article_ele in tree.xpath("//PubmedArticle"):
    pm_idx = "PMID:" + article_ele.xpath("./MedlineCitation/PMID/text()")[0]
    print(pm_idx)

In [None]:
#!/usr/bin/env python

# <code>Python
# " " "
#    Two functions to check the ARS's URL (https://ars-prod.transltr.io/ars/api/messages/) and then to fetch the EPC attributes      
#    @author: Salman Zarrini
# " " "
# </code>

import pandas as pd
import json
import requests
import time

#-------------
def checking_extracting_attributes_1(attribute_type_id_list, pk):
    """
    This function get EPC attributes from the ARS webpage using a knonw PK corresponding to a query.
    
    Args:
        attribute_type_id_list (list):  attribute_type_id of interest as a list
        pk (string): PK of an invoked query by a user
        
    Returns:
        return_type: The function return two outputs, one is the total time taken to check and extract requeired
        information from ARS. Another one is the extracted information as a Pandas DataFrame.
        
    Examples:
        Ex_1:
        attribute_type_id_list = ['biolink:publications', 'biolink:Publication', 'biolink:publication']
        pk = 'c01a5839-975f-4352-8aa3-5490d93adfc6'
        
        Ex_2:
        attribute_type_id_list = ['biolink:FDA_approval_status']
        pk = 'c01a5839-975f-4352-8aa3-5490d93adfc6'
        
    """
    
    total_checking_time = 0
    
    
    start_time_check = time.time()
    
    url = "https://ars-prod.transltr.io/ars/api/messages/"+pk
    response = requests.get(url)
    
    data_retrieved = response.json()
    
    pks_list = []
    edges_list = []
    all_attributes_list = []
    value_list = []
    
    if ('message' in data_retrieved['fields']['data']) and ('knowledge_graph' in data_retrieved['fields']['data']['message']):
        edges = data_retrieved['fields']['data']['message']['knowledge_graph']['edges']
        # Going through each edge and inspecting its attributes.
        for edge in edges.keys():
            pk_edge_attr = edges[edge]['attributes']
            for attr_numb in range(len(pk_edge_attr)):
                # Checking if the 'attribute_type_id' is FDA approval
                if pk_edge_attr[attr_numb]['attribute_type_id'] in attribute_type_id_list:
                            
                        end_time_check = time.time()
                        pk_time_checking = end_time_check - start_time_check
                        total_checking_time += pk_time_checking
                        
                        pks_list.append(pk)
                        edges_list.append(edge)
                        all_attributes_list.append(pk_edge_attr)
                        value_list.append(pk_edge_attr[attr_numb]['value'])
    temp = {
        'pks': pks_list,
        'edges': edges_list,
        'all_attrs': all_attributes_list,
        'values': value_list
    }
    
    df = pd.DataFrame(temp)
    
    return f'Total time of checking attribute for each query: {total_checking_time}', df
#-------------


def checking_extracting_attributes_2(attribute_type_id_list):
    """
    This function first extract the PK of the latest query and get the EPC attributes corresponding
    the extracted PKs from the ARS webpage.
    
    Args:
        attribute_type_id_list (list):  attribute_type_id of interest as a list
        
    Returns:
        return_type: The function return two outputs, one is the total time taken to check and extract requeired
        information from ARS. Another one is the extracted information as a Pandas DataFrame.
        
    Examples:
        Ex_1:
        attribute_type_id_list = ['biolink:publications', 'biolink:Publication', 'biolink:publication']
        
        Ex_2:
        attribute_type_id_list = ['biolink:FDA_approval_status']
        
    """
    
    total_checking_time = 0
    total_extracting_time = 0
    
    start_time_check = time.time()
    
    url = "https://ars-prod.transltr.io/ars/api/messages/"
    response = requests.get(uurl)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data_retrieved = response.json()
    
        # Extract the PK values from the data
        pks_retrieved = [item['pk'] for item in data_retrieved]
    
        pks_list = []
        edges_list = []
        all_attributes_list = []
        value_list = []
    
        for idx in range(len(data_retrieved)):
            # Making sure that the query's response has 'message' and then 'knowledge_graph' as keys, respectively
            if ('message' in data_retrieved[idx]['fields']['data']) and ('knowledge_graph' in data_retrieved[idx]['fields']['data']['message']):
                edges = data_retrieved[idx]['fields']['data']['message']['knowledge_graph']['edges']
                # Going through each edge and inspecting its attributes.
                for edge in edges.keys():
                    pk_edge_attr = edges[edge]['attributes']
                    for attr_numb in range(len(pk_edge_attr)):
                        # Checking if the 'attribute_type_id' is FDA approval
                        if pk_edge_attr[attr_numb]['attribute_type_id'] in attribute_type_id_list:
                            
                            end_time_check = time.time()
                            pk_time_checking = end_time_check - start_time_check
                            total_checking_time += pk_time_checking
                        
                            pks_list.append(pks_retrieved[idx])
                            edges_list.append(edge)
                            all_attributes_list.append(pk_edge_attr)
                            value_list.append(pk_edge_attr[attr_numb]['value'])
        temp = {
            'pks_': pks_list,
            'edgs_': edges_list,
            'all_attrs_': all_attributes_list,
            'value_': value_list
        }
        df = pd.DataFrame(temp)
        
    else:
        print('Failed to retrieve data. Status code:', response.status_code)  
    
    return f'Total time of checking attribute for each query: {total_checking_time}', df

#-------------

#Some examples PK from dump database to be used in the checking_extracting_attributes_1(attribute_type_id_list, pk) function:

pk_dump_publication = [
 'c01a5839-975f-4352-8aa3-5490d93adfc6',
 '7771c619-0211-4241-8284-2b1ecaad2585',
 '6cc89d3a-762f-41a6-9fd3-212e7dc71045',
 '70732f65-14bb-43be-84c3-499c13f04808',
 '1817194d-facc-414e-9e0d-df58c9d1fb88',
 '3f374b0b-9be3-4643-bc3d-94f3a704b5d2',
 '73bf8218-0364-48b9-af53-963779255778',
 '8a882a78-90e3-4451-bfbf-1903a4fc670d',
 '2791e7cf-a6eb-49bb-b82f-01102295a4b2',
 '9f0089ef-b45e-4440-ae52-31329d029d41',
 '9fef2e41-6dc5-453b-9a9f-e6e870e866a0',
 '08977acc-380a-477c-801a-4cfea879052b',
 'a8287166-e73a-47ae-a4fc-c32f903edcdb',
 '167a254a-8434-4dce-be31-6b401881ee78',
 'b7ce6ccf-246e-4378-9bbb-8d5701a95ff9',
 'f4afdf27-22fe-4cf9-9cd8-388cff57e933',
 '84522585-cf0b-4dbd-a752-93f2f8edf0a1',
 '902a3a45-5b35-4abb-a931-40bf77ce8ea4',
 'afe3458f-0b2f-4327-80fc-feddbf9373c4',
 '118e7dd5-ae29-4085-8a5b-1fa0e9b26f0e']


pk_dump_fda = [
 '6cc89d3a-762f-41a6-9fd3-212e7dc71045',
 '70732f65-14bb-43be-84c3-499c13f04808',
 '8a882a78-90e3-4451-bfbf-1903a4fc670d',
 'a8287166-e73a-47ae-a4fc-c32f903edcdb',
 'b7ce6ccf-246e-4378-9bbb-8d5701a95ff9',
 '118e7dd5-ae29-4085-8a5b-1fa0e9b26f0e',
 '50b80303-d8db-4195-9979-44340fee8ab8',
 'f3cb207c-3c5f-4a2a-80b3-ac60774500d0',
 'a4b08898-b517-4557-9cb4-121de17365e5',
 '94e876ef-836f-4334-8ca0-1f88b541e23e',
 'b2c53e95-6947-4365-8f77-9d344c8a6b97',
 'a8b55833-ce1a-4f12-9a7e-ff465b5df902',
 'a11a837d-0557-42db-9316-c9fc5d19b27e',
 '937b6eb2-0201-4bab-b2d3-dff7e7d1b59d',
 'f23fba18-503e-49d9-8ca9-03022d98be45',
 'd287a80d-8ef9-4270-932c-55d67c46ba70',
 '7cc631c4-b5c6-475e-9353-093b8e29fde7',
 '1c12b8a4-9894-4b3f-b547-4efe7f38a580',
 '803ec852-d7b8-4c4b-a7d5-7d7b01618a95',
 '1e5ca2cb-93af-4fe8-87d2-48069532b9b5']

pk_dump_not_pub_fda = [
 'c01a5839-975f-4352-8aa3-5490d93adfc6',
 '70732f65-14bb-43be-84c3-499c13f04808',
 '2edbca56-3759-4bae-a0a1-c404ff15c189',
 'a347d888-bd3e-46d0-ab15-74241956fb08',
 '9f0089ef-b45e-4440-ae52-31329d029d41',
 'a8287166-e73a-47ae-a4fc-c32f903edcdb',
 '2119f139-38b6-444f-99fa-905650777fe8',
 '1e5371c6-4219-4b42-b441-cc5ac86b767e',
 'afe3458f-0b2f-4327-80fc-feddbf9373c4',
 'f3cb207c-3c5f-4a2a-80b3-ac60774500d0',
 'f0b876f7-d37f-4160-8c93-d841fc64b787',
 'a4b08898-b517-4557-9cb4-121de17365e5',
 'b2c53e95-6947-4365-8f77-9d344c8a6b97',
 '4a32b5d3-584b-44ad-9eb5-788c5b4b3f84',
 'c341af23-23ea-44db-9fcf-ff64a30c30a2',
 '0cfd2aa2-41a1-43bd-bb1a-ae4f267e1089',
 '576b1692-f2c6-4c0b-a82d-a0c668d6ec72',
 'a8b55833-ce1a-4f12-9a7e-ff465b5df902',
 'b34267a3-788a-49ef-8d20-8cfda13a97f5',
 'f23fba18-503e-49d9-8ca9-03022d98be45']

#----------------

# An example for extracting the attribute_type_id of publication in att_list for three example PK and concatenate them together:

att_list = ['biolink:publications', 'biolink:Publication', 'biolink:publication']

df_publication = pd.DataFrame(columns= ['pks', 'edges', 'all_attrs', 'values'])

for pk in pk_dump_publication[:1]:
    tot_time, df = checking_extracting_attributes_1(att_list, pk)
    df_publication = pd.concat([df_publication, df], ignore_index=True)


#print(df_publication)
