# Queries on the knowledge graph

In [8]:
import sys
#!{sys.executable} -m pip install pandas oxrdflib Pygments

import pandas as pd
from IPython.display import display, HTML
from pygments import highlight
from pygments.lexers import SparqlLexer
from pygments.formatters import HtmlFormatter
from rdflib import Graph


def run_query(graph, query_path):
    try:
        with open(query_path, 'r') as file:
            query = file.read()
    except Exception as _e:
        print(f"No file for {query_path}")
        return
    results = graph.query(query)
    # Display the SPARQL query
    formatted_query = highlight(query, SparqlLexer(),
                                HtmlFormatter(style='solarized-dark', full=True, nobackground=True))
    display(HTML(formatted_query))
    # Convert results to a Pandas DataFrame
    res_list = []
    for row in results:
        res_list.append([str(item) for item in row])
    df = pd.DataFrame(res_list, columns=[str(var) for var in results.vars]) if len(res_list) > 0 else pd.DataFrame()
    # Display the DataFrame as a table in Jupyter Notebook
    #display(HTML(df.to_html()))
    return df


In [9]:
g = Graph(store='Oxigraph')
g.parse('data/finance-kg.ttl', format='turtle')

<Graph identifier=Ne601a05d38c4469497c4a5b09eaf6ebc (<class 'rdflib.graph.Graph'>)>

In [10]:
# count amount of triples
print(f'amount of triples: {len(g)}')

amount of triples: 531


In [11]:
import rdflib
import networkx as nx
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph

mdg = rdflib_to_networkx_multidigraph(g)
dg = rdflib_to_networkx_digraph(g)
dg.edges(data=True)

OutEdgeDataView([(rdflib.term.URIRef('https://finance.yahoo.com/sectors/energy/oil-gas-equipment-services'), rdflib.term.URIRef('https://w3id.org/finance/Industry'), {'triples': [(rdflib.term.URIRef('https://finance.yahoo.com/sectors/energy/oil-gas-equipment-services'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('https://w3id.org/finance/Industry'))], 'weight': 1}), (rdflib.term.URIRef('https://finance.yahoo.com/sectors/energy/oil-gas-equipment-services'), rdflib.term.Literal('Oil & Gas Equipment & Services', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), {'triples': [(rdflib.term.URIRef('https://finance.yahoo.com/sectors/energy/oil-gas-equipment-services'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Literal('Oil & Gas Equipment & Services', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')))], 'weight': 1}), (rdflib.term.URIRef('https://finance.yahoo.com/sec

In [12]:
g

<Graph identifier=Ne601a05d38c4469497c4a5b09eaf6ebc (<class 'rdflib.graph.Graph'>)>

In [13]:
import sys
#!{sys.executable} -m pip install pandas oxrdflib Pygments

In [15]:
run_query(g, 'Queries/test.rq')

Unnamed: 0,sector,sectorName
0,https://finance.yahoo.com/sectors/energy/oil-g...,Oil & Gas Equipment & Services
1,https://finance.yahoo.com/sectors/healthcare/h...,Healthcare Plans
2,https://finance.yahoo.com/sectors/consumer-cyc...,Auto Manufacturers
3,https://finance.yahoo.com/sectors/industrials/...,Trucking
4,https://finance.yahoo.com/sectors/consumer-cyc...,Apparel Manufacturing
...,...,...
140,https://finance.yahoo.com/sectors/consumer-def...,Household & Personal Products
141,https://finance.yahoo.com/sectors/healthcare/m...,Medical Devices
142,https://finance.yahoo.com/sectors/energy/oil-g...,Oil & Gas Midstream
143,https://finance.yahoo.com/sectors/consumer-cyc...,Resorts & Casinos
