# Agregation RoomData

In [3]:
import os
import json
import numpy as np
import pandas as pd
import joblib

INPUT = "../datas/processed/capteur_C013_features.csv"
MODEL_DIR = "../models"
OUT_DIR = "../reports"
os.makedirs(OUT_DIR, exist_ok=True)

ROOM_NAME = "Salle C013"
ROOM_ID = "C013"

pd.set_option("display.max_columns", 200)


In [4]:
df = pd.read_csv(INPUT, parse_dates=["timestamp"])
df = df.sort_values("timestamp").reset_index(drop=True)

df["date"] = df["timestamp"].dt.date
df["heure_str"] = df["timestamp"].dt.strftime("%H:%M")
df["heure"] = df["timestamp"].dt.hour
df["minute"] = df["timestamp"].dt.minute

df.head(3)


Unnamed: 0,timestamp,nom_salle,capacite_salle,temperature,temperature_ext,humidite,pression,nb_eleves_presents,jour,heure,minute,jour_semaine,est_weekend,salle_ouverte,heure_sin,heure_cos,ratio_occupation,ecart_temp_int_ext,ouvert_x_occupation,delta_temperature,delta_humidite,delta_pression,delta_temperature_ext,delta_occupation,temperature_lag1,temperature_lag2,temperature_ext_lag1,temperature_ext_lag2,humidite_lag1,humidite_lag2,pression_lag1,pression_lag2,nb_eleves_presents_lag1,nb_eleves_presents_lag2,temperature_moy_gliss_2,temperature_moy_gliss_6,temperature_ext_moy_gliss_2,temperature_ext_moy_gliss_6,humidite_moy_gliss_2,humidite_moy_gliss_6,nb_eleves_presents_moy_gliss_2,nb_eleves_presents_moy_gliss_6,date,heure_str
0,2025-08-20 22:00:00,C013,30,32.0,34.7,35.5,1014.8,0,2025-08-20,22,0,2,0,0,-0.5,0.866025,0.0,-2.7,0,-1.0,0.1,0.1,-1.7,0.0,32.0,32.0,34.7,34.7,35.5,35.5,1014.8,1014.8,0.0,0.0,32.0,32.0,34.7,34.7,35.5,35.5,0.0,0.0,2025-08-20,22:00
1,2025-08-20 22:30:00,C013,30,31.0,33.0,35.6,1014.9,0,2025-08-20,22,30,2,0,0,-0.382683,0.92388,0.0,-2.0,0,-1.0,0.1,0.1,-1.7,0.0,32.0,32.0,34.7,34.7,35.5,35.5,1014.8,1014.8,0.0,0.0,31.5,31.5,33.85,33.85,35.55,35.55,0.0,0.0,2025-08-20,22:30
2,2025-08-20 23:00:00,C013,30,30.7,33.0,40.3,1014.7,0,2025-08-20,23,0,2,0,0,-0.258819,0.965926,0.0,-2.3,0,-0.3,4.7,-0.2,0.0,0.0,31.0,32.0,33.0,34.7,35.6,35.5,1014.9,1014.8,0.0,0.0,30.85,31.233333,33.0,33.566667,37.95,37.133333,0.0,0.0,2025-08-20,23:00


In [5]:
feature_cols = [
    "temperature",
    "temperature_ext",
    "humidite",
    "pression",
    "nb_eleves_presents",
    "salle_ouverte",
    "heure_sin",
    "heure_cos",
    "jour_semaine",
    "est_weekend",
    "temperature_lag1",
    "temperature_lag2",
    "temperature_ext_lag1",
    "temperature_ext_lag2",
    "humidite_lag1",
    "humidite_lag2",
    "pression_lag1",
    "pression_lag2",
    "nb_eleves_presents_lag1",
    "nb_eleves_presents_lag2",
    "temperature_moy_gliss_2",
    "temperature_moy_gliss_6",
    "temperature_ext_moy_gliss_2",
    "temperature_ext_moy_gliss_6",
    "humidite_moy_gliss_2",
    "humidite_moy_gliss_6",
    "nb_eleves_presents_moy_gliss_2",
    "nb_eleves_presents_moy_gliss_6",
    "ratio_occupation",
    "ecart_temp_int_ext",
    "ouvert_x_occupation",
]

feature_cols = [c for c in feature_cols if c in df.columns]
X_all = df[feature_cols].copy()
X_all.shape


(336, 31)

In [6]:
temp_levels = joblib.load(os.path.join(MODEL_DIR, "rf_temperature_levels_tuned.joblib"))
hum_levels = joblib.load(os.path.join(MODEL_DIR, "rf_humidite_levels_tuned.joblib"))
pres_levels = joblib.load(os.path.join(MODEL_DIR, "rf_pression_levels_tuned.joblib"))


In [7]:
H1, H2 = 2, 4

y_temp_pred = temp_levels.predict(X_all)
df["temperature_pred_+1h_from_t"] = y_temp_pred[:, 0]
df["temperature_pred_+2h_from_t"] = y_temp_pred[:, 1]

df["temperature_pred"] = pd.Series(df["temperature_pred_+1h_from_t"]).shift(H1)

y_hum_pred = hum_levels.predict(X_all)
df["humidite_pred_+1h_from_t"] = y_hum_pred[:, 0]
df["humidite_pred_+2h_from_t"] = y_hum_pred[:, 1]
df["humidite_pred"] = pd.Series(df["humidite_pred_+1h_from_t"]).shift(H1)

y_pres_pred = pres_levels.predict(X_all)
df["pression_pred_+1h_from_t"] = y_pres_pred[:, 0]
df["pression_pred_+2h_from_t"] = y_pres_pred[:, 1]
df["pression_pred"] = pd.Series(df["pression_pred_+1h_from_t"]).shift(H1)

df_pred = df.dropna(
    subset=["temperature_pred", "humidite_pred", "pression_pred"]
).copy()
df_pred.head(3)


Unnamed: 0,timestamp,nom_salle,capacite_salle,temperature,temperature_ext,humidite,pression,nb_eleves_presents,jour,heure,minute,jour_semaine,est_weekend,salle_ouverte,heure_sin,heure_cos,ratio_occupation,ecart_temp_int_ext,ouvert_x_occupation,delta_temperature,delta_humidite,delta_pression,delta_temperature_ext,delta_occupation,temperature_lag1,temperature_lag2,temperature_ext_lag1,temperature_ext_lag2,humidite_lag1,humidite_lag2,pression_lag1,pression_lag2,nb_eleves_presents_lag1,nb_eleves_presents_lag2,temperature_moy_gliss_2,temperature_moy_gliss_6,temperature_ext_moy_gliss_2,temperature_ext_moy_gliss_6,humidite_moy_gliss_2,humidite_moy_gliss_6,nb_eleves_presents_moy_gliss_2,nb_eleves_presents_moy_gliss_6,date,heure_str,temperature_pred_+1h_from_t,temperature_pred_+2h_from_t,temperature_pred,humidite_pred_+1h_from_t,humidite_pred_+2h_from_t,humidite_pred,pression_pred_+1h_from_t,pression_pred_+2h_from_t,pression_pred
2,2025-08-20 23:00:00,C013,30,30.7,33.0,40.3,1014.7,0,2025-08-20,23,0,2,0,0,-0.258819,0.965926,0.0,-2.3,0,-0.3,4.7,-0.2,0.0,0.0,31.0,32.0,33.0,34.7,35.6,35.5,1014.9,1014.8,0.0,0.0,30.85,31.233333,33.0,33.566667,37.95,37.133333,0.0,0.0,2025-08-20,23:00,29.49875,27.91375,30.2375,39.74225,37.34125,39.5565,1014.31,1013.974,1014.26775
3,2025-08-20 23:30:00,C013,30,29.9,31.7,36.6,1014.5,0,2025-08-20,23,30,2,0,0,-0.130526,0.991445,0.0,-1.8,0,-0.8,-3.7,-0.2,-1.3,0.0,30.7,31.0,33.0,33.0,40.3,35.6,1014.7,1014.9,0.0,0.0,30.3,30.9,32.35,33.1,38.45,37.0,0.0,0.0,2025-08-20,23:30,28.651,27.27,29.692,40.5705,36.73825,37.7555,1014.176,1013.835,1014.382
4,2025-08-21 00:00:00,C013,30,29.9,31.0,40.6,1014.5,0,2025-08-21,0,0,3,0,0,0.0,1.0,0.0,-1.1,0,0.0,4.0,0.0,-0.7,0.0,29.9,30.7,31.7,33.0,36.6,40.3,1014.5,1014.7,0.0,0.0,29.9,30.7,31.35,32.68,38.6,37.72,0.0,0.0,2025-08-21,00:00,28.19875,26.1025,29.49875,36.97125,38.55775,39.74225,1013.994,1013.89175,1014.31


In [9]:
HEURES_CIBLE = [
    "09:00",
    "10:00",
    "11:00",
    "12:00",
    "13:00",
    "14:00",
    "15:00",
    "16:00",
    "17:00",
]
mask = (df_pred["minute"] == 0) & (df_pred["heure_str"].isin(HEURES_CIBLE))
df_grid = df_pred.loc[
    mask,
    [
        "timestamp",
        "date",
        "heure_str",
        "temperature_pred",
        "humidite_pred",
        "pression_pred",
    ],
].copy()

df_grid.shape


(63, 6)

In [10]:
unique_dates = sorted(df_grid["date"].unique())
day_map = {d: f"day{i + 1}" for i, d in enumerate(unique_dates)}
df_grid["day_key"] = df_grid["date"].map(day_map)

df_grid["temperature_pred"] = df_grid["temperature_pred"].round(1)
df_grid["humidite_pred"] = df_grid["humidite_pred"].round(1)
df_grid["pression_pred"] = df_grid["pression_pred"].round(1)

df_grid.head(10)


Unnamed: 0,timestamp,date,heure_str,temperature_pred,humidite_pred,pression_pred,day_key
22,2025-08-21 09:00:00,2025-08-21,09:00,25.5,42.4,1012.3,day1
24,2025-08-21 10:00:00,2025-08-21,10:00,25.4,41.5,1012.1,day1
26,2025-08-21 11:00:00,2025-08-21,11:00,26.9,40.4,1012.1,day1
28,2025-08-21 12:00:00,2025-08-21,12:00,28.3,42.1,1012.0,day1
30,2025-08-21 13:00:00,2025-08-21,13:00,29.4,40.5,1012.3,day1
32,2025-08-21 14:00:00,2025-08-21,14:00,29.9,42.1,1012.4,day1
34,2025-08-21 15:00:00,2025-08-21,15:00,30.5,41.0,1012.0,day1
36,2025-08-21 16:00:00,2025-08-21,16:00,29.8,37.8,1012.2,day1
38,2025-08-21 17:00:00,2025-08-21,17:00,30.7,36.5,1011.8,day1
70,2025-08-22 09:00:00,2025-08-22,09:00,21.1,51.1,1010.6,day2


In [11]:
roomdata = {"room": {"name": ROOM_NAME, "id": ROOM_ID, "days": {}}}

for day_key, g in df_grid.groupby("day_key"):
    temp_hours = {}
    hum_hours = {}
    pres_hours = {}

    for h in [
        "09:00",
        "10:00",
        "11:00",
        "12:00",
        "13:00",
        "14:00",
        "15:00",
        "16:00",
        "17:00",
    ]:
        row = g[g["heure_str"] == h]
        if not row.empty:
            temp_hours[h] = float(row["temperature_pred"].iloc[0])
            hum_hours[h] = float(row["humidite_pred"].iloc[0])
            pres_hours[h] = float(row["pression_pred"].iloc[0])

    roomdata["room"]["days"][day_key] = {
        "temperature": temp_hours,
        "humidity": hum_hours,
        "airPressure": pres_hours,
    }

print(list(roomdata["room"]["days"].keys())[:3])
print(
    json.dumps(
        {k: list(v.keys()) for k, v in roomdata["room"]["days"].items()}, indent=2
    )
)


['day1', 'day2', 'day3']
{
  "day1": [
    "temperature",
    "humidity",
    "airPressure"
  ],
  "day2": [
    "temperature",
    "humidity",
    "airPressure"
  ],
  "day3": [
    "temperature",
    "humidity",
    "airPressure"
  ],
  "day4": [
    "temperature",
    "humidity",
    "airPressure"
  ],
  "day5": [
    "temperature",
    "humidity",
    "airPressure"
  ],
  "day6": [
    "temperature",
    "humidity",
    "airPressure"
  ],
  "day7": [
    "temperature",
    "humidity",
    "airPressure"
  ]
}


In [12]:
out_path = os.path.join(OUT_DIR, "roomdata.json")
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(roomdata, f, ensure_ascii=False, indent=2)

print("RoomData exporté :", out_path)


RoomData exporté : ../reports/roomdata.json


In [19]:
IN_PATH = "../reports/roomdata.json"
OUT_PATH = "../reports/roomdata_with_comfort.json"

RANGES_OK = {
    "temp": (20.0, 26.0),
    "hum": (40.0, 60.0),
    "pres": (1005.0, 1025.0),
}
RANGES_EXCELLENT = {
    "temp": (21.0, 24.0),
    "hum": (45.0, 55.0),
    "pres": (1010.0, 1020.0),
}


def _flag(val, ok_rng, exc_rng, label_ok, label_low, label_high):
    if exc_rng[0] <= val <= exc_rng[1]:
        return "excellent", f"{label_ok} excellent"
    if ok_rng[0] <= val <= ok_rng[1]:
        if val < exc_rng[0]:
            side = f"légèrement bas (≥{ok_rng[0]})"
        elif val > exc_rng[1]:
            side = f"légèrement haut (≤{ok_rng[1]})"
        else:
            side = "ok"
        return "ok", f"{label_ok} ({side})"
    if val < ok_rng[0]:
        return "out", f"{label_low} trop bas(<{ok_rng[0]})"
    else:
        return "out", f"{label_high} top haut (>{ok_rng[1]})"


def evaluer_confort_3niveaux(temp, hum, pres):
    t_flag, t_det = _flag(
        temp,
        RANGES_OK["temp"],
        RANGES_EXCELLENT["temp"],
        "température :",
        "température trop basse",
        "température trop haute",
    )
    h_flag, h_det = _flag(
        hum,
        RANGES_OK["hum"],
        RANGES_EXCELLENT["hum"],
        "humidité:",
        "air trop sec",
        "air trop humide",
    )
    p_flag, p_det = _flag(
        pres,
        RANGES_OK["pres"],
        RANGES_EXCELLENT["pres"],
        "pression:",
        "pression basse",
        "pression élevée",
    )

    flags = [t_flag, h_flag, p_flag]
    
    if all(f == "excellent" for f in flags):
        status = "excellent"
    elif any(f == "out" for f in flags):
        status = "mediocre"
    else:
        status = "ok"

    score = sum(1 if f == "excellent" else (0.5 if f == "ok" else 0) for f in flags)

    return {"status": status, "score": score, "details": [t_det, h_det, p_det]}

if not os.path.exists(IN_PATH):
    raise FileNotFoundError(
        f"Introuvable : {IN_PATH}. Lancez l'agregation avant."
    )

with open(IN_PATH, "r", encoding="utf-8") as f:
    roomdata = json.load(f)

days = roomdata.get("room", {}).get("days", {})
if not days:
    raise ValueError("roomdata.json ne contient pas 'room.days'.")

for day_key, day in days.items():
    comfort = {}
    temperature = day.get("temperature", {})
    humidity = day.get("humidity", {})
    pressure = day.get("airPressure", {})

    heures = sorted(
        set(temperature.keys()) & set(humidity.keys()) & set(pressure.keys())
    )
    for h in heures:
        res = evaluer_confort_3niveaux(
            float(temperature[h]), float(humidity[h]), float(pressure[h])
        )
        comfort[h] = res

    day["comfort"] = comfort

    counts = {"excellent": 0, "ok": 0, "mediocre": 0}
    for res in comfort.values():
        counts[res["status"]] += 1
    total = sum(counts.values())

    if total > 0:
        if counts["mediocre"] >= max(
            1, total // 3
        ):
            day_status = "mediocre"
        elif counts["excellent"] >= (total * 0.6):
            day_status = "excellent"
        else:
            day_status = "ok"
    else:
        day_status = "mediocre"

    day["comfortSummary"] = {
        "dayStatus": day_status,
        "hours": counts,
        "totalHours": total,
    }

with open(OUT_PATH, "w", encoding="utf-8") as f:
    json.dump(roomdata, f, ensure_ascii=False, indent=2)

print("niveaux de confort ajoutés :", OUT_PATH)


niveaux de confort ajoutés : ../reports/roomdata_with_comfort.json
