# Notebook Aim
To investigate .ttl files and parse them

In [1]:
# https://github.com/RDFLib/rdflib
#!pip install rdflib
from rdflib import Graph

## Ontology File
The RDf data dump comes with an ontology file that can use to know which RDF types to expect in the `.ttl` files.

In [9]:
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
import networkx as nx
import matplotlib.pyplot as plt

url = 'https://www.w3.org/TeamSubmission/turtle/tests/test-30.ttl'

onto_graph = Graph()
result = onto_graph.parse("./wpOntology.ttl", format='turtle')

# G = rdflib_to_networkx_multidigraph(result)

# # Plot Networkx instance of RDF Graph
# pos = nx.spring_layout(G, scale=2)
# plt.figure(figsize=(50,50))
# edge_labels = nx.get_edge_attributes(G, 'r')
# nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
# nx.draw(G, with_labels=True)

# #if not in interactive mode for 

# plt.savefig("ontology_graph.png")

## Test WP File - WP111

In [3]:
g = Graph()

# Parse an RDF file
wp111g = g.parse("./WP111.ttl")

In [4]:
len(wp111g)  # Number of subj/pred/obj

2353

In [14]:
# Getting labels of GeneProducts
q = """
    PREFIX dc: <http://purl.org/dc/elements/1.1/>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT ?label
    WHERE {
        ?p rdf:type wp:GeneProduct .

        ?p rdfs:label ?label .
    }
"""
for r in wp111g.query(q):
    print(r["label"])
    break

NDUFS3


### Parse Nodes

In [108]:
# Get and parse nodes
# ";" can be used when same subj/pred/obj used - https://www.stardog.com/tutorials/sparql#ordering-results
q = """
    SELECT ?ttl_id ?type ?label ?id_source ?identifier
    WHERE {
        ?ttl_id rdf:type wp:DataNode ;
            rdfs:label ?label ;
            dc:source ?id_source ;
            dcterms:identifier ?identifier ;
            rdf:type ?type .
    }
"""

nodes = dict()
for match in wp111g.query(q):
    result_dict = {key: str(val) for key, val in match.asdict().items()}
    result_dict["type"] = match.type.split("#")[1]
    
    # rdf type comes after #, don't need "DataNode" since it's a duplicate
    if result_dict["type"] == "DataNode":
        continue
    
    ttl_id = result_dict.pop("ttl_id")
    nodes[ttl_id] = result_dict

### Parse Interactions Using Node Data

In [111]:
# Get and parse edges
# ";" can be used when same subj/pred/obj used - https://www.stardog.com/tutorials/sparql#ordering-results
int_q = """
    SELECT ?ttl_id ?type ?source ?target ?identifier ?participants
    WHERE {
        ?ttl_id rdf:type wp:Interaction ;
            wp:participants ?participants ;
            wp:source ?source ;
            wp:target ?target ;
            rdf:type ?type .
    }
"""

interactions = dict()
for match in wp111g.query(int_q):
    print(match.participants)
    
    # Binding always with ComplexBinding so skip Binding
    
    
#     result_dict = {key: str(val) for key, val in match.asdict().items()}
#     result_dict["type"] = match.type.split("#")[1]
    
#     # rdf type comes after #, don't need "DataNode" since it's a duplicate
#     if result_dict["type"] == "DataNode":
#         continue
    
#     ttl_id = result_dict.pop("ttl_id")
#     interactions[ttl_id] = result_dict

http://rdf.wikipathways.org/Pathway/WP111_r117097/Complex/fdc14
http://rdf.wikipathways.org/Pathway/WP111_r117097/Complex/fdc14
http://rdf.wikipathways.org/Pathway/WP111_r117097/Complex/fdc14
http://rdf.wikipathways.org/Pathway/WP111_r117097/WP/Interaction/fee15
http://rdf.wikipathways.org/Pathway/WP111_r117097/WP/Interaction/fee15
http://rdf.wikipathways.org/Pathway/WP111_r117097/WP/Interaction/fee15
https://identifiers.org/chebi/CHEBI:15378
https://identifiers.org/chebi/CHEBI:15378
http://rdf.wikipathways.org/Pathway/WP111_r117097/Complex/ad4b9
http://rdf.wikipathways.org/Pathway/WP111_r117097/Complex/ad4b9
http://rdf.wikipathways.org/Pathway/WP111_r117097/Complex/ad4b9
http://rdf.wikipathways.org/Pathway/WP111_r117097/WP/Interaction/c243a
http://rdf.wikipathways.org/Pathway/WP111_r117097/WP/Interaction/c243a
http://rdf.wikipathways.org/Pathway/WP111_r117097/WP/Interaction/c243a
https://identifiers.org/hmdb/HMDB0002012
https://identifiers.org/hmdb/HMDB0002012
https://identifiers.org/