In [1]:
# validate_shacl_and_summary.py
from pathlib import Path
from rdflib import Graph, Namespace
from pyshacl import validate
import pandas as pd

# chemins
BASE = Path.home()/ "DTE" / "jne_project"
DATA_TTL   = BASE / "graph" / "edtb_graph.ttl"         # ton graphe
SHAPES_TTL = BASE / "graph" / "shapes.ttl"             # le fichier ci-dessus
OUT_CSV    = BASE / "graph" / "shacl_summary.csv"
OUT_TEXT   = BASE / "graph" / "shacl_results.txt"

# exécution
data_g   = Graph().parse(DATA_TTL, format="turtle")
shapes_g = Graph().parse(SHAPES_TTL, format="turtle")

conforms, results_graph, results_text = validate(
    data_graph=data_g,
    shacl_graph=shapes_g,
    inference='rdfs',
    abort_on_error=False,
    meta_shacl=False,
    advanced=True,
)

# sauvegarde du rapport texte
OUT_TEXT.write_text(results_text, encoding="utf-8")

# synthèse par forme (sourceShape)
SH = Namespace("http://www.w3.org/ns/shacl#")
rows = []
for r in results_graph.subjects(predicate=SH.resultSeverity):
    src = results_graph.value(subject=r, predicate=SH.sourceShape)
    msg = results_graph.value(subject=r, predicate=SH.resultMessage)
    foc = results_graph.value(subject=r, predicate=SH.focusNode)
    rows.append({
        "sourceShape": str(src).split("#")[-1] if src else "",
        "focusNode":   str(foc).split("#")[-1] if foc else "",
        "message":     str(msg) if msg else "",
    })

df = pd.DataFrame(rows)
if df.empty:
    # conforme: construire un tableau vide avec 0 violation par forme définie
    shapes = [str(s).split("#")[-1] for s in shapes_g.subjects(RDF.type, SH.NodeShape)]
    summary = pd.DataFrame({"Shape": shapes, "Violations": 0})
else:
    summary = df.groupby("sourceShape", dropna=False).size().reset_index(name="Violations")
    summary.rename(columns={"sourceShape":"Shape"}, inplace=True)

summary.sort_values("Violations", ascending=False, inplace=True)
summary.to_csv(OUT_CSV, index=False)

print("Conformité globale:", conforms)
print("Résumé violations par forme:\n", summary)
print("Écrit:", OUT_CSV, "et", OUT_TEXT)


Usage of abort_on_error is deprecated. Use abort_on_first instead.


Conformité globale: False
Résumé violations par forme:
                                   Shape  Violations
0  n8f349bc08fd54642a3c4e4547637d038b11           1
1  n8f349bc08fd54642a3c4e4547637d038b13           1
2  n8f349bc08fd54642a3c4e4547637d038b15           1
3   n8f349bc08fd54642a3c4e4547637d038b2           1
4  n8f349bc08fd54642a3c4e4547637d038b21           1
5  n8f349bc08fd54642a3c4e4547637d038b23           1
6  n8f349bc08fd54642a3c4e4547637d038b27           1
7   n8f349bc08fd54642a3c4e4547637d038b3           1
8   n8f349bc08fd54642a3c4e4547637d038b5           1
9   n8f349bc08fd54642a3c4e4547637d038b7           1
Écrit: /home/amina/DTE/jne_project/graph/shacl_summary.csv et /home/amina/DTE/jne_project/graph/shacl_results.txt


In [3]:
import pandas as pd
from rdflib import Graph
from pyshacl import validate

# Charger graphe RDF
graph_file = "/home/amina/DTE/jne_project/graph/edtb_graph.ttl"
g = Graph().parse(graph_file, format="turtle")

# Charger les shapes SHACL
shapes_file = "/home/amina/DTE/jne_project/graph/shapes.ttl"
shapes_g = Graph().parse(shapes_file, format="turtle")

# Validation SHACL
conforms, results_graph, results_text = validate(
    g, shacl_graph=shapes_g,
    ont_graph=None, inference='rdfs',
    abort_on_first=False, meta_shacl=False, debug=False
)

# Extraire résultats
violations = []
for s, p, o in results_graph.triples((None, None, None)):
    if str(p).endswith("resultMessage"):
        shape = results_graph.value(subject=s, predicate=results_graph.namespace_manager.qname("sh:sourceShape"))
        focus = results_graph.value(subject=s, predicate=results_graph.namespace_manager.qname("sh:focusNode"))
        violations.append({
            "Shape": str(shape),
            "Violation": str(o),
            "Exemple": str(focus)
        })

# Construire tableau résumé
df = pd.DataFrame(violations)
summary = df.groupby("Shape").agg({
    "Violation": "count",
    "Exemple": lambda x: list(x)[:2]  # max 2 exemples
}).reset_index().rename(columns={"Violation": "Nombre de violations"})

# Sauvegarde
summary.to_csv("/home/amina/DTE/jne_project/graph/shacl_summary_detailed.csv", index=False)
print(summary)


  Shape  Nombre de violations       Exemple
0  None                    10  [None, None]


In [4]:
from rdflib import Graph
g = Graph()
g.parse("/home/amina/DTE/jne_project/graph/edtb_graph.ttl", format="turtle")
print("Triples total :", len(g))


Triples total : 175715


In [9]:
from rdflib import Graph, Namespace
g = Graph()
g.parse("/home/amina/DTE/jne_project/graph/edtb_graph.ttl", format="turtle")
BRICK = Namespace("https://brickschema.org/schema/Brick#")
EX    = Namespace("http://example.org/training#")

def count_ns(ns):
    return sum(1 for s,p,o in g if str(s).startswith(ns) or str(p).startswith(ns) or str(o).startswith(ns))

print("Triples Brick :", count_ns(str(BRICK)))
print("Triples EX    :", count_ns(str(EX)))
print("Triples total :", len(g))

files = {
    "BIM only": "bim_only.ttl",
    "BIM + Weather": "bim_weather.ttl",
    "BIM + Weather + IoT": "bim_weather_iot.ttl",
    "BIM + Weather + IoT + BMS": "edtb_graph.ttl",
}
for label, path in files.items():
    g = Graph().parse(path, format="turtle")
    print(f"{label:28s} {len(g):d}")



Triples Brick : 58
Triples EX    : 175715
Triples total : 175715


FileNotFoundError: [Errno 2] No such file or directory: '/home/amina/bim_only.ttl'

In [10]:
from rdflib import Graph, Namespace
BASE = Path.home()/"DTE"/"jne_project"/"graph"/"edtb_graph.ttl"
g = Graph().parse(BASE.as_posix(), format="turtle")
print("Triples total :", len(g))

BRICK = Namespace("https://brickschema.org/schema/Brick#")
EX    = Namespace("http://example.org/training#")
def count_ns(ns):
    return sum(1 for s,p,o in g if str(s).startswith(ns) or str(p).startswith(ns) or str(o).startswith(ns))
print("≈Triples Brick :", count_ns(str(BRICK)))
print("≈Triples EX    :", count_ns(str(EX)))


Triples total : 175715
≈Triples Brick : 58
≈Triples EX    : 175715


In [12]:
from rdflib import Graph, Namespace
g = Graph().parse("/home/amina/DTE/jne_project/graph/edtb_graph.ttl", format="turtle")

print("Triples total:", len(g))
print("Sujets uniques:", len(set(s for s,_,_ in g)))
print("Prédicats uniques:", len(set(p for _,p,_ in g)))
print("Objets uniques:", len(set(o for _,_,o in g)))

BRICK = Namespace("https://brickschema.org/schema/Brick#")
SOS  = Namespace("http://www.w3.org/ns/sosa/")

def count_type(t):
    return sum(1 for _ in g.triples((None, g.namespace_manager.qname("rdf:type"), t)))  # si échec, SPARQL ci-dessous

# plus robuste avec SPARQL:
def ctype(turi):
    return list(g.query(f"SELECT (COUNT(*) AS ?n) WHERE {{ ?s a <{turi}> }}"))[0][0]

print("Rooms:", ctype(BRICK.Room))
print("HVAC systems:", ctype(BRICK.HVAC_System))
print("Lighting systems:", ctype(BRICK.Lighting_System))
print("Weather stations:", ctype(BRICK.Weather_Station))

print("Temp sensors:", ctype(BRICK.Temperature_Sensor))
print("Humidity sensors:", ctype(BRICK.Humidity_Sensor))
print("CO2 sensors:", ctype(BRICK.CO2_Sensor))
print("Power sensors:", ctype(BRICK.Power_Sensor))
print("Occupancy sensors:", ctype(BRICK.Occupancy_Sensor))

print("Observations SOSA:", list(g.query(
    "SELECT (COUNT(*) AS ?n) WHERE { ?o a <http://www.w3.org/ns/sosa/Observation> }"
))[0][0])


Triples total: 175715
Sujets uniques: 34568
Prédicats uniques: 16
Objets uniques: 16674
Rooms: 1
HVAC systems: 1
Lighting systems: 1
Weather stations: 1
Temp sensors: 1
Humidity sensors: 1
CO2 sensors: 1
Power sensors: 1
Occupancy sensors: 2
Observations SOSA: 34548


In [14]:
from rdflib import Graph, URIRef
from pathlib import Path

TTL = Path.home()/"DTE"/"jne_project"/"graph"/"edtb_graph.ttl"
g = Graph().parse(TTL.as_posix(), format="turtle")

BRICK = "https://brickschema.org/schema/Brick#"
SOSA  = "http://www.w3.org/ns/sosa/"

def q(qs):  # helper
    return list(g.query(qs))

def triples_for_subjects(S):
    S = set(S)
    return sum(1 for s,p,o in g if s in S)

# --- groupes de capteurs ---
room_sensors = {s for (s,) in q(f"""
  SELECT ?s WHERE {{
    ?s a <{BRICK}Sensor> ;
       <{BRICK}isLocatedIn> ?r .
    ?r a <{BRICK}Room> .
  }}
""")}

weather_sensors = {s for (s,) in q(f"""
  SELECT ?s WHERE {{
    ?s a <{BRICK}Sensor> ;
       <{BRICK}isLocatedIn> ?st .
    ?st a <{BRICK}Weather_Station> .
  }}
""")}

bms_sensors = {s for (s,) in q(f"""
  SELECT ?s WHERE {{
    ?sys <{BRICK}hasPoint> ?s ;
         a <{BRICK}HVAC_System> .
  }}
""")} | {s for (s,) in q(f"""
  SELECT ?s WHERE {{
    ?sys <{BRICK}hasPoint> ?s ;
         a <{BRICK}Lighting_System> .
  }}
""")}

# observations liées à un ensemble de capteurs
def obs_of(sensors):
    sensors = list(sensors)
    if not sensors:
        return set()
    values = " ".join(f"<{str(x)}>" for x in sensors)
    res = q(f"""
      SELECT ?o WHERE {{
        VALUES ?s {{ {values} }}
        ?o a <{SOSA}Observation> ;
           <{SOSA}madeBySensor> ?s .
      }}
    """)
    return {r[0] for r in res}

def summarize_group(name, sensors):
    obs = obs_of(sensors)
    t_obs = triples_for_subjects(obs)
    print(f"{name:12s} | sensors: {len(sensors):4d} | observations: {len(obs):6d} | obs_triples≈ {t_obs:7d}")

print("\n=== Par source (approx.) ===")
summarize_group("Weather",   weather_sensors)
summarize_group("IoT (rooms)", room_sensors)
summarize_group("BMS",       bms_sensors)



=== Par source (approx.) ===
Weather      | sensors:    0 | observations:      0 | obs_triples≈       0
IoT (rooms)  | sensors:    0 | observations:      0 | obs_triples≈       0
BMS          | sensors:    7 | observations:  20160 | obs_triples≈  100800


In [17]:
# build_graph_stepwise.py
from pathlib import Path
import argparse
import pandas as pd
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD

# ---------- Chemins ----------
HOME = Path.home()
BASE = HOME / "DTE" / "jne_project"
GRAPH_DIR = BASE / "graph"
RAW_DIR   = BASE / "raw"

BIM_TTL = GRAPH_DIR / "Training_room_en.ttl"  # BIM de base (déjà fourni)

OUT_BIM_ONLY         = GRAPH_DIR / "bim_only.ttl"
OUT_BIM_WEATHER      = GRAPH_DIR / "bim_weather.ttl"
OUT_BIM_WEATHER_IOT  = GRAPH_DIR / "bim_weather_iot.ttl"
OUT_FINAL            = GRAPH_DIR / "edtb_graph.ttl"

# ---------- Vocabulaires ----------
BRICK = Namespace("https://brickschema.org/schema/Brick#")
SOSA  = Namespace("http://www.w3.org/ns/sosa/")
QUDT  = Namespace("http://qudt.org/schema/qudt/")
UNIT  = Namespace("http://qudt.org/vocab/unit/")
EX    = Namespace("http://example.org/training#")

# ---------- Utilitaires ----------
def bind_prefixes(g: Graph) -> Graph:
    g.bind("brick", BRICK); g.bind("sosa", SOSA)
    g.bind("qudt", QUDT);   g.bind("unit", UNIT)
    g.bind("ex", EX);       g.bind("rdfs", RDFS)
    return g

def read_one_csv(path_or_dir: Path) -> pd.DataFrame:
    """Accepte un fichier ou un dossier (avec 1 CSV)."""
    p = path_or_dir
    if p.is_dir():
        csvs = sorted(p.glob("*.csv"))
        if len(csvs) != 1:
            raise FileNotFoundError(f"Attendu 1 CSV dans {p}, trouvé {len(csvs)}.")
        p = csvs[0]
    if not p.exists():
        raise FileNotFoundError(p)
    return pd.read_csv(p)

def getcol(df: pd.DataFrame, alts: list[str]) -> str:
    """Retourne le nom de colonne existant parmi des alternatives."""
    for c in alts:
        if c in df.columns: return c
    raise KeyError(f"Colonne manquante, alternatives: {alts}")

def add_obs(g: Graph, sensor: URIRef, when, value,
            observed_property_iri: URIRef, unit_iri: URIRef):
    """Ajoute une observation SOSA simple (valeur + time + unité)."""
    ts = pd.to_datetime(when, utc=True)
    oid = URIRef(str(sensor) + f"/obs/{ts.value}")
    g.add((oid, RDF.type, SOSA.Observation))
    g.add((oid, SOSA.madeBySensor, sensor))
    g.add((oid, SOSA.observedProperty, observed_property_iri))
    g.add((oid, SOSA.hasSimpleResult, Literal(value)))
    g.add((oid, QUDT.unit, unit_iri))
    g.add((oid, SOSA.resultTime, Literal(ts.to_pydatetime(), datatype=XSD.dateTime)))

# ---------- Étapes ----------
def load_bim() -> Graph:
    g = Graph().parse(BIM_TTL.as_posix(), format="turtle")
    return bind_prefixes(g)

def add_weather(g: Graph, weather_csv: Path, sample_every: int | None = None):
    df = read_one_csv(weather_csv).copy()
    df[getcol(df, ["ts","time","timestamp"])] = pd.to_datetime(
        df[getcol(df, ["ts","time","timestamp"])], utc=True)
    df = df.rename(columns={
        getcol(df, ["ts","time","timestamp"]): "ts",
        getcol(df, ["weather_temp_c","T_ext","temp_c"]): "T",
        getcol(df, ["weather_rh_pct","RH","rh_pct"]): "RH",
        getcol(df, ["weather_wind_ms","wind","wind_ms"]): "W",
        getcol(df, ["weather_ghi_wm2","GHI","ghi_wm2"]): "GHI",
    })
    st = EX.WeatherStation1
    g.add((st, RDF.type, BRICK.Weather_Station))
    sT = EX.WS_Temp; g.add((sT, RDF.type, BRICK.Temperature_Sensor)); g.add((sT, BRICK.isLocatedIn, st))
    sH = EX.WS_RH;   g.add((sH, RDF.type, BRICK.Humidity_Sensor));    g.add((sH, BRICK.isLocatedIn, st))
    sW = EX.WS_Wind; g.add((sW, RDF.type, BRICK.Wind_Speed_Sensor));  g.add((sW, BRICK.isLocatedIn, st))
    sG = EX.WS_GHI;  g.add((sG, RDF.type, BRICK.Solar_Radiation_Sensor)); g.add((sG, BRICK.isLocatedIn, st))

    step = sample_every or 1
    for _, r in df.iloc[::step].iterrows():
        ts = r["ts"]
        add_obs(g, sT, ts, r["T"],  BRICK.Air_Temperature, UNIT["DegreeC"])
        add_obs(g, sH, ts, r["RH"], BRICK.Relative_Humidity, UNIT["Percent"])
        add_obs(g, sW, ts, r["W"],  BRICK.Wind_Speed, UNIT["MeterPerSecond"])
        add_obs(g, sG, ts, r["GHI"],BRICK.Solar_Irradiance, UNIT["W-PER-M2"])

def add_iot(g: Graph, sensors_csv: Path, sample_every: int | None = None):
    df = read_one_csv(sensors_csv).copy()
    df[getcol(df, ["ts","time","timestamp"])] = pd.to_datetime(
        df[getcol(df, ["ts","time","timestamp"])], utc=True)
    df = df.rename(columns={
        getcol(df, ["ts","time","timestamp"]): "ts",
        getcol(df, ["temp_int_c","T_int","room_temp_c"]): "T_int",
        getcol(df, ["rh_int_pct","RH_int","room_rh_pct"]): "RH_int",
        getcol(df, ["co2_ppm","CO2","co2"]): "CO2",
        getcol(df, ["pir_bin","PIR","occupancy"]): "PIR",
        getcol(df, ["power_total_kw","P_total","power_kw"]): "P_kw",
    })

    room = EX.Room_101
    sTi = EX.R101_Temp;  g.add((sTi, RDF.type, BRICK.Temperature_Sensor)); g.add((sTi, BRICK.isLocatedIn, room))
    sRHi= EX.R101_RH;    g.add((sRHi, RDF.type, BRICK.Humidity_Sensor));   g.add((sRHi, BRICK.isLocatedIn, room))
    sCO2= EX.R101_CO2;   g.add((sCO2, RDF.type, BRICK.CO2_Sensor));        g.add((sCO2, BRICK.isLocatedIn, room))
    sPIR= EX.R101_PIR;   g.add((sPIR, RDF.type, BRICK.Occupancy_Sensor));  g.add((sPIR, BRICK.isLocatedIn, room))
    sPow= EX.R101_Power; g.add((sPow, RDF.type, BRICK.Power_Sensor));      g.add((sPow, BRICK.isLocatedIn, room))

    step = sample_every or 1
    for _, r in df.iloc[::step].iterrows():
        ts = r["ts"]
        add_obs(g, sTi, ts, r["T_int"],  BRICK.Air_Temperature, UNIT["DegreeC"])
        add_obs(g, sRHi, ts, r["RH_int"],BRICK.Relative_Humidity, UNIT["Percent"])
        add_obs(g, sCO2, ts, r["CO2"],   BRICK.CO2, UNIT["PPM"])
        add_obs(g, sPIR, ts, int(r["PIR"]), BRICK.Occupancy, UNIT["UNITLESS"])
        add_obs(g, sPow, ts, r["P_kw"],  BRICK.Electric_Power, UNIT["KiloW"])

def add_bms(g: Graph, bms_csv: Path, sample_every: int | None = None):
    df = read_one_csv(bms_csv).copy()
    df[getcol(df, ["ts","time","timestamp"])] = pd.to_datetime(
        df[getcol(df, ["ts","time","timestamp"])], utc=True)
    df = df.rename(columns={
        getcol(df, ["ts","time","timestamp"]): "ts",
        getcol(df, ["T_set","setpoint_c","set_temp_c"]): "T_set",
        getcol(df, ["P_hvac","hvac_kw"]): "P_hvac",
        getcol(df, ["P_lighting","lighting_kw"]): "P_light",
    })

    room = EX.Room_101
    hvac = EX.HVAC1;     g.add((hvac, RDF.type, BRICK.HVAC_System));     g.add((hvac, BRICK.feeds, room))
    light= EX.Lighting1; g.add((light, RDF.type, BRICK.Lighting_System)); g.add((light, BRICK.feeds, room))

    pH = EX.HVAC_Power;  g.add((pH, RDF.type, BRICK.Power_Sensor)); g.add((hvac,  BRICK.hasPoint, pH)); g.add((pH, BRICK.isLocatedIn, room))
    pL = EX.Light_Power; g.add((pL, RDF.type, BRICK.Power_Sensor)); g.add((light, BRICK.hasPoint, pL)); g.add((pL, BRICK.isLocatedIn, room))
    pSet = EX.HVAC_Tset; g.add((pSet, RDF.type, BRICK.Temperature_Sensor)); g.add((hvac, BRICK.hasPoint, pSet)); g.add((pSet, BRICK.isLocatedIn, room))

    step = sample_every or 1
    for _, r in df.iloc[::step].iterrows():
        ts = r["ts"]
        if "P_hvac" in r:  add_obs(g, pH,   ts, r["P_hvac"], BRICK.Electric_Power, UNIT["KiloW"])
        if "P_light" in r: add_obs(g, pL,   ts, r["P_light"], BRICK.Electric_Power, UNIT["KiloW"])
        if "T_set"  in r:  add_obs(g, pSet, ts, r["T_set"],  BRICK.Air_Temperature, UNIT["DegreeC"])

# ---------- Main ----------
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--month", default="2025-03", help="YYYY-MM sous raw/")
    ap.add_argument("--sample_every", type=int, default=None, help="1 = tout, 4 = 1/4 des lignes")
    args, _ = ap.parse_known_args()   # OK pour Jupyter

    WX  = RAW_DIR / "weather" / args.month / "weather.csv"
    IOT = RAW_DIR / "sensors" / args.month / "zone_101_sensors.csv"
    BMS = RAW_DIR / "bms"     / args.month / "bms.csv"
    GRAPH_DIR.mkdir(parents=True, exist_ok=True)

    # 1) BIM ONLY
    g = load_bim()
    g.serialize(OUT_BIM_ONLY.as_posix(), format="turtle")
    print("OK ->", OUT_BIM_ONLY, "| triples:", len(g))

    # 2) + WEATHER
    add_weather(g, WX, sample_every=args.sample_every)
    g.serialize(OUT_BIM_WEATHER.as_posix(), format="turtle")
    print("OK ->", OUT_BIM_WEATHER, "| triples:", len(g))

    # 3) + IoT
    add_iot(g, IOT, sample_every=args.sample_every)
    g.serialize(OUT_BIM_WEATHER_IOT.as_posix(), format="turtle")
    print("OK ->", OUT_BIM_WEATHER_IOT, "| triples:", len(g))

    # 4) + BMS (final)
    add_bms(g, BMS, sample_every=args.sample_every)
    g.serialize(OUT_FINAL.as_posix(), format="turtle")
    print("OK ->", OUT_FINAL, "| triples:", len(g))

if __name__ == "__main__":
    main()


OK -> /home/amina/DTE/jne_project/graph/bim_only.ttl | triples: 95
OK -> /home/amina/DTE/jne_project/graph/bim_weather.ttl | triples: 69151
OK -> /home/amina/DTE/jne_project/graph/bim_weather_iot.ttl | triples: 155561
OK -> /home/amina/DTE/jne_project/graph/edtb_graph.ttl | triples: 207410
