# RDF und SPARQL Sandkasten

In [None]:
# nötige Module importieren
%pip install -q ipywidgets==8.1.1 ipycytoscape networkx rdflib
import rdflib
import ipycytoscape as cy
import networkx as nx
import pandas as pd

## Funktionsdefinitionen

In [None]:
# Funktionsfür die Konvertierung von RDF zu NetworkX
def rdflib_to_networkx(rdf_graph):
    G = nx.DiGraph()  # Use Graph() if you want an undirected graph
    for subj, pred, obj in rdf_graph:
        if isinstance(subj, rdflib.URIRef):
            subj = rdf_graph.qname(subj)
        elif isinstance(subj, rdflib.BNode):
            subj = f"_:bnode_{subj}"  # Convert blank node to a unique string
        pred = rdf_graph.qname(pred)
        if isinstance(obj, rdflib.URIRef):
            obj = rdf_graph.qname(obj)
        elif isinstance(obj, rdflib.BNode):
            obj = f"_:bnode_{obj}"  # Convert blank node to a unique string
        elif isinstance(obj, rdflib.Literal):
            obj = str(obj)  # Convert Literal to its string representation
        G.add_edge(str(subj), str(obj), label=str(pred))
    return G

In [None]:
# Funktionsdefinition, um einen TTL String zu parsen und als Graph zu plotten
def parse_and_plot(ttl_string):
    g = rdflib.Graph()
    g.parse(data = ttl_string)
    nx_graph = rdflib_to_networkx(g)
    plot = cy.CytoscapeWidget()
    plot.graph.add_graph_from_networkx(nx_graph, directed=True)

    style = [
        {
            'selector': 'node',
             'style': {
                'font-family': 'helvetica',
                'font-size': '12px',
                 'color': 'white',
                'text-outline-width': 2,
                'text-outline-color': 'green',
                'background-color': 'green',
                'content': 'data(id)',
                'text-valign': 'center',
             }
        },
        {
            'selector': 'edge.directed',
            'style': {
                'font-family': 'helvetica',
                'font-size': '12px',
                'label': 'data(label)',
                'color': 'white',
                'text-outline-width': 2,
                'text-outline-color': 'orange',
                'background-color': 'orange',
                'curve-style': 'bezier',
                'target-arrow-shape': 'triangle',
            }
        }
    ]
    
    plot.set_style(style)
    return plot

In [None]:
# Funktionsdefinition, um eine Query gegen eine lokale Turtle Datei auszuführen
def query(ttl_string, query_string):
    
    g = rdflib.Graph()
    g.parse(data = ttl_string)

    qres = g.query(query_string)

    df = pd.DataFrame(qres, columns=qres.vars)
    return df

## Spezifikationen

Diese technischen Dokumente sind sehr lesenwert (und für Spezifikationen auch sehr leserlich geschrieben):

- [RDF Primer](https://www.w3.org/TR/rdf11-primer/)
- [Turtle](https://www.w3.org/TR/turtle/)
- [SPARQL](https://www.w3.org/TR/sparql11-query/)

## TTL mit allen Features

In [None]:
ttl_string = """

@prefix : <https://ld.di.digisus-lab.ch/>.
@prefix schema: <http://schema.org/>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.

:KarinKeller-Sutter schema:hasOccupation :Bundesrat;
    :partei :FDP;
    :anzahlGeschwister 3.
    
:FDP rdfs:label "FDP.Die Liberalen"@de, "PLR.Les Libéraux-Radicaux"@fr.

:ViolaAmherd schema:hasOccupation :Bundesrat;
    :partei :DieMitte;
    :vollerName "Viola Patricia Amherd".

:NadineMasshardt schema:hasOccupation :Nationalrat;
    :partei :SP;
    :geborenIn :AffolternAmAlbis;
    schema:birthDate "1984-10-04"^^xsd:date.

:MayaGraf schema:hasOccupation [
        :role :Staenderat;
        schema:startDate "2019-12-04"^^xsd:date];
    
    schema:hasOccupation [
        :role :Nationalrat;
        schema:startDate "2001-06-05"^^xsd:date;
        schema:stopDate "2019-12-03"^^xsd:date];
    
    :partei :Gruene;
    schema:parent :FritzGraf.

:FritzGraf schema:hasOccupation :BaselbieterLandrat;
    :partei :SVP;
    schema:children :MayaGraf.

"""

plot = parse_and_plot(ttl_string)
plot

## SPARQL Query

In [None]:
query_string = """

PREFIX : <https://ld.di.digisus-lab.ch/>
PREFIX schema: <http://schema.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>


SELECT ?s ?p ?o WHERE {

    ?s ?p ?o.

}

"""

df = query(ttl_string, query_string)
display(df)