In [47]:
import os
import rdflib
import pandas as pd

# Loading RDF Datasets

In [48]:
def get_rdf_filenames(dirname: str):
    """  """
    rdf_filenames = []
    for filename in os.listdir(dirname):
        file_path = os.path.join(dirname, filename)
        if os.path.isfile(file_path) and filename.split('.')[-1] == 'ttl':
            rdf_filenames.append(filename)
    return rdf_filenames

def load_rdf_files(dirnames: list):
    """  """
    g = rdflib.Graph()

    for dirname in dirnames:
        print(f'Load RDF data from directory {dirname} into RDF graph.')
    
        rdf_filenames = get_rdf_filenames(dirname)
        for rdf_filename in rdf_filenames:
            g.parse(f'{dirname}/{rdf_filename}')

    return g

In [49]:
rdf_dirnames = ['HAMLET_DATA', 'MONARCH_PHENOPACKET_STORE_DATA']
g = load_rdf_files(rdf_dirnames)

Load RDF data from directory HAMLET_DATA into RDF graph.
Load RDF data from directory MONARCH_PHENOPACKET_STORE_DATA into RDF graph.


# SPARQL Queries

In [50]:
SCENARIO_NR = 4

with open(f'SPARQL/scenario_{SCENARIO_NR}/question1_a.rq', 'r') as file:
    query = file.read()

In [51]:
results = g.query(query)

In [52]:
diagnosis_list = []

for result_row in results:
    diagnosis_list.append({'gene symbol': result_row[0], 'diagnosis ID': result_row[1],
                           'disease ID': result_row[2], 'disease label': result_row[3]})

In [59]:
diagnosis_df = pd.DataFrame(diagnosis_list)
diagnosis_df

Unnamed: 0,gene symbol,diagnosis ID,disease ID,disease label
0,NRAS,https://example.org/diagnosis_phenopacketee17c...,OMIM:613224,Noonan syndrome 6
1,NRAS,https://example.org/diagnosis_phenopacketba7f5...,OMIM:613224,Noonan syndrome 6
2,NRAS,https://example.org/diagnosis_phenopacket3b2b2...,OMIM:613224,Noonan syndrome 6
3,NRAS,https://example.org/diagnosis_phenopacketb7df3...,OMIM:613224,Noonan syndrome 6
4,NRAS,https://example.org/diagnosis_phenopacketbf2d0...,OMIM:613224,Noonan syndrome 6
5,NRAS,https://example.org/diagnosis_phenopacket14db1...,OMIM:613224,Noonan syndrome 6
6,NRAS,https://example.org/diagnosis_phenopacket1caa9...,OMIM:613224,Noonan syndrome 6
7,NRAS,https://example.org/diagnosis_phenopacketfa9d7...,OMIM:613224,Noonan syndrome 6
8,NRAS,https://example.org/diagnosis_phenopacket0520c...,OMIM:613224,Noonan syndrome 6
9,NRAS,https://example.org/diagnosis_phenopacket0aaf0...,OMIM:613224,Noonan syndrome 6


In [60]:
from myst_nb import glue

glue('scenario4_diagnosis', diagnosis_df)

Unnamed: 0,gene symbol,diagnosis ID,disease ID,disease label
0,NRAS,https://example.org/diagnosis_phenopacketee17c...,OMIM:613224,Noonan syndrome 6
1,NRAS,https://example.org/diagnosis_phenopacketba7f5...,OMIM:613224,Noonan syndrome 6
2,NRAS,https://example.org/diagnosis_phenopacket3b2b2...,OMIM:613224,Noonan syndrome 6
3,NRAS,https://example.org/diagnosis_phenopacketb7df3...,OMIM:613224,Noonan syndrome 6
4,NRAS,https://example.org/diagnosis_phenopacketbf2d0...,OMIM:613224,Noonan syndrome 6
5,NRAS,https://example.org/diagnosis_phenopacket14db1...,OMIM:613224,Noonan syndrome 6
6,NRAS,https://example.org/diagnosis_phenopacket1caa9...,OMIM:613224,Noonan syndrome 6
7,NRAS,https://example.org/diagnosis_phenopacketfa9d7...,OMIM:613224,Noonan syndrome 6
8,NRAS,https://example.org/diagnosis_phenopacket0520c...,OMIM:613224,Noonan syndrome 6
9,NRAS,https://example.org/diagnosis_phenopacket0aaf0...,OMIM:613224,Noonan syndrome 6
