In [2]:
from rdflib import Graph
import pandas as pd

g = Graph()
g.parse("data/ttl/global_kg.ttl", format="turtle")
print(f"Triples: {len(g)}")

Triples: 7682


In [4]:
for s, p, o in list(g)[:5]:
    print(f"{s} -- {p} --> {o}")

http://example.org/client_2/patient/8 -- http://example.org/medical#hasOutcome --> http://example.org/outcome/1.0
http://example.org/client_1/patient/111 -- http://example.org/medical#DiabetesPedigreeFunction --> 0.431
http://example.org/client_1/patient/139 -- http://example.org/medical#Pregnancies --> 2.0
http://example.org/client_2/patient/292 -- http://example.org/medical#hasOutcome --> http://example.org/outcome/1.0
http://example.org/client_1/patient/337 -- http://example.org/medical#Insulin --> 231.0


In [5]:
# 📊 Estrai tutte le informazioni su pazienti in DataFrame
query = """
PREFIX med: <http://example.org/medical#>

SELECT ?patient ?feature ?value
WHERE {
  ?patient a med:Patient .
  ?patient ?feature ?value .
  FILTER (?feature != rdf:type)
}
"""

In [6]:
results = g.query(query)
data = []
for row in results:
    data.append((str(row.patient), str(row.feature).replace("http://example.org/medical#", ""), str(row.value)))

df = pd.DataFrame(data, columns=["Patient", "Feature", "Value"])
df.head()

Unnamed: 0,Patient,Feature,Value
0,http://example.org/client_1/patient/0,Age,50.0
1,http://example.org/client_1/patient/0,BMI,33.6
2,http://example.org/client_1/patient/0,BloodPressure,72.0
3,http://example.org/client_1/patient/0,DiabetesPedigreeFunction,0.627
4,http://example.org/client_1/patient/0,Glucose,148.0


In [7]:
#Distribuzione degli outcome
df[df["Feature"] == "hasOutcome"]["Value"].value_counts()

Value
http://example.org/outcome/0.0    500
http://example.org/outcome/1.0    268
Name: count, dtype: int64

In [8]:
#Pivot per visualizzare i dati dei pazienti in formato tabellare
df_wide = df.pivot(index="Patient", columns="Feature", values="Value").reset_index()
df_wide.head()


Feature,Patient,Age,BMI,BloodPressure,DiabetesPedigreeFunction,Glucose,Insulin,Pregnancies,SkinThickness,hasOutcome
0,http://example.org/client_1/patient/0,50.0,33.6,72.0,0.627,148.0,0.0,6.0,35.0,http://example.org/outcome/1.0
1,http://example.org/client_1/patient/1,33.0,43.1,40.0,2.288,137.0,168.0,0.0,35.0,http://example.org/outcome/1.0
2,http://example.org/client_1/patient/10,31.0,45.8,84.0,0.551,118.0,230.0,0.0,47.0,http://example.org/outcome/1.0
3,http://example.org/client_1/patient/100,42.0,24.4,55.0,0.136,85.0,0.0,8.0,20.0,http://example.org/outcome/0.0
4,http://example.org/client_1/patient/101,21.0,24.3,58.0,0.187,105.0,0.0,1.0,0.0,http://example.org/outcome/0.0


In [9]:
#export per analisi avanzate
df_wide.to_csv("global_kg_as_table.csv", index=False)
