In [1]:
!pip install rdflib pandas lxml owlrl pyshacl



You should consider upgrading via the 'D:\Users\AWESTHOF\OneDrive - Capgemini\Academy\Linked Data - Ontologies using OWL and SHACL\assets\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [2]:
# Transforming csv

import pandas as pd
from rdflib import Graph, Literal, RDF, URIRef, Namespace, FOAF

# Define the CSV file path
csv_file_path = './assets/data.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_file_path)

# Create an RDF graph
g = Graph()

# Define a namespace
EX = Namespace("http://example.org/")
SDO = Namespace("http://schema.org/")

# Bind namespaces to the graph
g.bind("ex", EX)
g.bind("sdo", SDO)

# Loop through each row in the DataFrame
for index, row in df.iterrows():
    
    # Check if the essential fields are filled before assignment
    if pd.notna(row['id']) and pd.notna(row['FirstName']) and pd.notna(row['LastName']):
        
        # Extract values from the row
        id = row['id']
        name = f"{row['FirstName']} {row['LastName']}"
        dob = row['DateOfBirth'] if pd.notna(row['DateOfBirth']) else None
        home = row['ComesFrom'] if pd.notna(row['ComesFrom']) else None
        instrument = row['Instrument'] if pd.notna(row['Instrument']) else None
        
        # Create RDF triples, adding only if fields are not None
        subject = URIRef(f"http://example.org/{id}")
        
        if id:
            g.add((subject, RDF.type, EX.Person))
        if name:
            g.add((subject, FOAF.name, Literal(name)))
        if dob:
            g.add((subject, SDO.birthDate, Literal(dob)))
        if home:
            g.add((subject, EX.home, Literal(home)))
        if instrument:
            g.add((subject, EX.playsInstrument, URIRef(f"{EX}{instrument}")))

# Serialize the graph to an RDF file
output_file = './assets/transformed_csv.ttl'
g.serialize(destination=output_file, format='turtle')

print(f"Graph written to {output_file}")



Graph written to ./assets/transformed_csv.ttl


In [3]:
# Transforming json

import json
from rdflib import Graph, Literal, RDF, URIRef, Namespace, FOAF

# Parse the JSON data
file_path = './assets/data.json'

# Open the file and load the content
with open(file_path, 'r') as f:
    try:
        json_data = json.load(f)  # Use json.load for reading directly from a file
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")

# Create an RDF graph
g = Graph()

# Define a namespace
EX = Namespace("http://example.org/")
SDO = Namespace("http://schema.org/")

# Bind namespaces to the graph
g.bind("ex", EX)
g.bind("sdo", SDO)

# Iterate over each object in the JSON array
for person in json_data:
    
    # Create a unique subject URI for each person based on their homepage
    subject = URIRef(f"{EX}{person['id']}")
    
    if 'fullName' in person:
        g.add((subject, FOAF.name, Literal(person['fullName'])))
    if 'home' in person:
        g.add((subject, EX.home, Literal(person['home'])))
    if 'id':
        g.add((subject, RDF.type, EX.Person))
    if 'playsInstrument' in person:
        g.add((subject, EX.playsInstrument, URIRef(f"{EX}{person['playsInstrument']}")))
    if 'aka' in subject:
        g.add((person_uri, FOAF.nick, Literal(person['aka'])))

# Serialize the graph to an RDF/XML file
output_file = './assets/transformed_json.ttl'
g.serialize(destination=output_file, format='turtle')

print(f"Graph written to {output_file}")

Graph written to ./assets/transformed_json.ttl


In [4]:
# Transforming xml

from lxml import etree
from rdflib import Graph, Literal, RDF, URIRef, Namespace

# Load XML from a file
tree = etree.parse('./assets/data.xml')
root = tree.getroot()

# Create an RDF graph
g = Graph()

# Define a namespace
EX = Namespace("http://example.org/")
SDO = Namespace("http://schema.org/")

# Bind namespaces to the graph
g.bind("ex", EX)
g.bind("sdo", SDO)

# Iterate over each person in the XML
for person in root.findall('Person'):
    # Extract fields from XML
    id = person.find('id').text if person.find('id') is not None else None
    full_name = person.find('FullName').text if person.find('FullName') is not None else None
    alias = person.find('Alias').text if person.find('Alias') is not None else None
    born_in = person.find('BornIn').text if person.find('BornIn') is not None else None
    date_of_birth = person.find('DOB').text if person.find('DOB') is not None else None
    address = person.find('Address').text if person.find('Address') is not None else None
    phone = person.find('Phone').text if person.find('Phone') is not None else None
    
    # Check if essential fields are filled before creating RDF triples
    if id and full_name:
        
        # Create RDF triples, adding only if fields are not None
        subject = URIRef(f"http://example.org/{id}")
        if id:
            g.add((subject, RDF.type, EX.Person))
        if full_name:
            g.add((subject, FOAF.name, Literal(full_name)))
        if alias:
            g.add((subject, FOAF.nick, Literal(alias)))
        if born_in:
            g.add((subject, EX.home, Literal(born_in)))
        if date_of_birth:
            g.add((subject, SDO.birthDate, Literal(date_of_birth)))
        if address:
            g.add((subject, SDO.address, Literal(address)))
        if phone:
            g.add((subject, SDO.telephone, Literal(phone)))
# Serialize the graph to an RDF/XML file
output_file = './assets/transformed_xml.ttl'
g.serialize(destination=output_file, format='turtle')

print(f"Graph written to {output_file}")

Graph written to ./assets/transformed_xml.ttl


In [5]:
# Loading RDF data into a Graph

import rdflib

combined_graph = rdflib.Graph()

# List of turtle files
turtle_files = [
    "./assets/transformed_csv.ttl",
    "./assets/transformed_json.ttl",
    "./assets/transformed_xml.ttl"
]

# Load each TRIG file into the named graph
for file in turtle_files:
    g = rdflib.Graph()
    g.parse(file, format="turtle")
    
    # Add triples to combined graph
    for s, p, o in g:
        combined_graph.add((s, p, o))


In [6]:
# Querying the Graph with SPARQL

from IPython.display import display, HTML
import pandas as pd

# Define a simple SPARQL query
query = """
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ex: <http://example.org/>
PREFIX sdo: <http://schema.org/>

SELECT ?person ?name ?nickname ?birthDate ?home ?instrument
WHERE {
    ?person a ex:Person  ;
        foaf:name ?name ;
        ex:home ?home .
    OPTIONAL { ?person foaf:nick ?nickname }
    OPTIONAL { ?person ex:playsInstrument ?instrument }
    OPTIONAL { ?person sdo:birthDate ?birthDate }
}
"""

# Execute the query
results = combined_graph.query(query)

# Convert the results to a Pandas DataFrame
data = []
for row in results:
    data.append({str(var): str(row[var]) for var in row.labels})

df = pd.DataFrame(data)

# Display the DataFrame as an HTML table
html_table = df.to_html()
display(HTML(html_table))

Unnamed: 0,person,name,nickname,birthDate,home,instrument
0,http://example.org/2,Leia Organa,,8053-11-09,Alderaan,
1,http://example.org/3,Han Solo,,8047-08-27,Corellia,http://example.org/Guitar
2,http://example.org/4,C 3PO,,8062-02-01,Tattooine,http://example.org/Saxophone
3,http://example.org/1,Luke Skywalker,,8053-11-09,Tattooine,
4,http://example.org/678,Tyrion Lannister,,,Casterly Rock,http://example.org/Piano
5,http://example.org/456,Jon Snow,,,The Wall,http://example.org/Guitar
6,http://example.org/567,Daenerys Targaryen,,,Dragonstone,
7,http://example.org/23456,Steve Rogers,Captain America,1918-07-04,"Brooklyn, NYC",
8,http://example.org/12345,Tony Stark,Iron Man,1970-05-29,"Manhattan, NYC",
9,http://example.org/34567,Natasha Romanoff,Black Widow,1984-12-03,Stalingrad,
