In [12]:
import os
import rdflib
import oxrdflib
import pandas as pd

from pygments import highlight
from pygments.lexers import SparqlLexer
from pygments.formatters import HtmlFormatter
from IPython.display import HTML

from myst_nb import glue

# Loading RDF Datasets

To simulate querying over different data source services, the RDF datasets are stored in a conjunctive graph in which each graph being the graph for the HAMLET RDF data and the graph for the PHENOPACKET-STORE data have their own keyword.

In [3]:
def get_rdf_filenames(dirname: str):
    """ 
        Get the RDF filenames in Turtle format given the directory. 
    """
    rdf_filenames = []
    for filename in os.listdir(dirname):
        file_path = os.path.join(dirname, filename)
        if os.path.isfile(file_path) and filename.split('.')[-1] == 'ttl':
            rdf_filenames.append(filename)
    return rdf_filenames

def load_rdf_files(dirnames: list):
    """ 
        Load the RDF files into separate graphs and then combine into a conjunctive graph with their keyword
        to simulate separate service data sources. 
    """
    service_graphs = []

    for dirname in dirnames:
        print(f'Load RDF data from directory {dirname} into RDF graph.')
        g = rdflib.Graph(store=oxrdflib.OxigraphStore())
    
        rdf_filenames = get_rdf_filenames(dirname)
        for rdf_filename in rdf_filenames:
            g.parse(f'{dirname}/{rdf_filename}')

        service_graphs.append({'service': rdflib.URIRef(f'http://example.org/{dirname}'), 'graph': g})

    combined_g = rdflib.ConjunctiveGraph(store=oxrdflib.OxigraphStore())
    for service_graph in service_graphs:
        print(f'Add graph of service {service_graph['service']}')
        combined_g.addN((s, p, o, service_graph['service']) for s, p, o in service_graph['graph'])

    return combined_g

In [4]:
rdf_dirnames = ['HAMLET_DATA', 'MONARCH_PHENOPACKET_STORE_DATA']
g = load_rdf_files(rdf_dirnames)

Load RDF data from directory HAMLET_DATA into RDF graph.
Load RDF data from directory MONARCH_PHENOPACKET_STORE_DATA into RDF graph.
Add graph of service http://example.org/HAMLET_DATA
Add graph of service http://example.org/MONARCH_PHENOPACKET_STORE_DATA


# SPARQL Queries

The query that is loaded below accesses the conjunctive graph explicitly given the keywords of each individual graph using function GRAPH to simulate the SERVICE functionality of SPARQL.

In [5]:
def get_prettyprint_sparql_query(query: str):
    """ Get the given string representing a SPARQL query in a readable manner. """
    lexer = SparqlLexer()
    formatter = HtmlFormatter(style='colorful')
    highlighted_query = highlight(query, lexer, formatter)
    return HTML(highlighted_query)

In [6]:
SCENARIO_NR = 4

## Question 1

**Have patients been observed with variants in the same gene and the same diagnosis?**

From the single AML cell line phenopacket from the HAMLET dataset, all mutated genes are found that are as follows used to find the phenopackets from the PHENOPACKET-STORE data that have mutations in at least one of the same genes.

In [7]:
with open(f'SPARQL/scenario_{SCENARIO_NR}/question1_a.rq', 'r') as file:
    query = file.read()

get_prettyprint_sparql_query(query)

In [8]:
results = g.query(query)

In [9]:
diagnosis_list = []

for result_row in results:
    diagnosis_list.append({'gene symbol': result_row[0], 'diagnosis ID': result_row[1],
                           'disease ID': result_row[2], 'disease label': result_row[3]})

In [14]:
diagnosis_df = pd.DataFrame(diagnosis_list)
glue('scenario4_diagnosis', diagnosis_df)

Unnamed: 0,gene symbol,diagnosis ID,disease ID,disease label
0,NRAS,https://example.org/diagnosis_phenopacketbf2d0...,OMIM:613224,Noonan syndrome 6
1,NRAS,https://example.org/diagnosis_phenopacketb7df3...,OMIM:613224,Noonan syndrome 6
2,NRAS,https://example.org/diagnosis_phenopacket0aaf0...,OMIM:613224,Noonan syndrome 6
3,NRAS,https://example.org/diagnosis_phenopacket3b2b2...,OMIM:613224,Noonan syndrome 6
4,NRAS,https://example.org/diagnosis_phenopacket1caa9...,OMIM:613224,Noonan syndrome 6
5,NRAS,https://example.org/diagnosis_phenopacket0520c...,OMIM:613224,Noonan syndrome 6
6,NRAS,https://example.org/diagnosis_phenopacketf0c96...,OMIM:613224,Noonan syndrome 6
7,NRAS,https://example.org/diagnosis_phenopacketee17c...,OMIM:613224,Noonan syndrome 6
8,NRAS,https://example.org/diagnosis_phenopacketa127c...,OMIM:613224,Noonan syndrome 6
9,NRAS,https://example.org/diagnosis_phenopacket14db1...,OMIM:613224,Noonan syndrome 6
