# Reporting

In [2]:
import os, json, base64
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from datetime import datetime

DATA_PATH = "../datas/processed/capteur_C013_features.csv"
MODEL_DIR = "../models"
FIG_DIR = "../reports/figures"
REPORT_DIR = "../reports"
os.makedirs(REPORT_DIR, exist_ok=True)

pd.set_option("display.max_columns", 200)


In [3]:
df = pd.read_csv(DATA_PATH, parse_dates=["timestamp"])
df = df.sort_values("timestamp").reset_index(drop=True)

feature_cols = [
    "temperature",
    "temperature_ext",
    "humidite",
    "pression",
    "nb_eleves_presents",
    "salle_ouverte",
    "heure_sin",
    "heure_cos",
    "jour_semaine",
    "est_weekend",
    "temperature_lag1",
    "temperature_lag2",
    "temperature_ext_lag1",
    "temperature_ext_lag2",
    "humidite_lag1",
    "humidite_lag2",
    "pression_lag1",
    "pression_lag2",
    "nb_eleves_presents_lag1",
    "nb_eleves_presents_lag2",
    "temperature_moy_gliss_2",
    "temperature_moy_gliss_6",
    "temperature_ext_moy_gliss_2",
    "temperature_ext_moy_gliss_6",
    "humidite_moy_gliss_2",
    "humidite_moy_gliss_6",
    "nb_eleves_presents_moy_gliss_2",
    "nb_eleves_presents_moy_gliss_6",
    "ratio_occupation",
    "ecart_temp_int_ext",
    "ouvert_x_occupation",
]
feature_cols = [c for c in feature_cols if c in df.columns]


In [4]:
tscv = TimeSeriesSplit(n_splits=5)


def build_targets(df, var):
    H1, H2 = 2, 4
    out = df.copy()
    out[f"{var}_t+2"] = out[var].shift(-H1)
    out[f"{var}_t+4"] = out[var].shift(-H2)
    out = out.dropna(subset=[f"{var}_t+2", f"{var}_t+4"]).reset_index(drop=True)
    X = out[feature_cols].copy()
    Y = out[[f"{var}_t+2", f"{var}_t+4"]].copy()
    return out, X, Y


def eval_model_levels(model_path, X, Y, var_label):
    model = joblib.load(model_path)
    scores = []
    last_fold = None
    for fold, (tr, te) in enumerate(tscv.split(X), 1):
        Xte, Yte = X.iloc[te], Y.iloc[te]
        Ypred = model.predict(Xte)
        for i, col in enumerate(Y.columns):
            mae = mean_absolute_error(Yte.iloc[:, i], Ypred[:, i])
            rmse = root_mean_squared_error(Yte.iloc[:, i], Ypred[:, i])
            scores.append(
                {
                    "variable": var_label,
                    "cible": col,
                    "fold": fold,
                    "MAE": round(mae, 1),
                    "RMSE": round(rmse, 1),
                }
            )
        last_fold = (te, Yte, Ypred)
    return pd.DataFrame(scores), last_fold


def img_to_data_uri(path):
    if not os.path.exists(path):
        return None
    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")

    ext = os.path.splitext(path)[1].lower()
    mime = "image/png" if ext in [".png"] else "image/jpeg"
    return f"data:{mime};base64,{b64}"


In [5]:
dfT, XT, YT = build_targets(df, "temperature")
dfH, XH, YH = build_targets(df, "humidite")
dfP, XP, YP = build_targets(df, "pression")

M_TEMP = os.path.join(MODEL_DIR, "rf_temperature_levels_tuned.joblib")
M_HUM = os.path.join(MODEL_DIR, "rf_humidite_levels_tuned.joblib")
M_PRES = os.path.join(MODEL_DIR, "rf_pression_levels_tuned.joblib")

scores_T, last_T = eval_model_levels(M_TEMP, XT, YT, "Température (levels)")
scores_H, last_H = eval_model_levels(M_HUM, XH, YH, "Humidité (levels)")
scores_P, last_P = eval_model_levels(M_PRES, XP, YP, "Pression (levels)")

df_scores = pd.concat([scores_T, scores_H, scores_P]).reset_index(drop=True)
df_scores.head()


Unnamed: 0,variable,cible,fold,MAE,RMSE
0,Température (levels),temperature_t+2,1,0.2,0.3
1,Température (levels),temperature_t+4,1,0.2,0.3
2,Température (levels),temperature_t+2,2,0.2,0.2
3,Température (levels),temperature_t+4,2,0.2,0.2
4,Température (levels),temperature_t+2,3,0.2,0.2


In [6]:
figs = {
    "Température +1h": os.path.join(FIG_DIR, "temperature_obs_vs_pred_t+1h.png"),
    "Température +2h": os.path.join(FIG_DIR, "temperature_obs_vs_pred_t+2h.png"),
    "Humidité +1h": os.path.join(FIG_DIR, "humidite_obs_vs_pred_t+1h.png"),
    "Humidité +2h": os.path.join(FIG_DIR, "humidite_obs_vs_pred_t+2h.png"),
    "Pression +1h": os.path.join(FIG_DIR, "pression_obs_vs_pred_t+1h.png"),
    "Pression +2h": os.path.join(FIG_DIR, "pression_obs_vs_pred_t+2h.png"),
}
fig_uris = {k: img_to_data_uri(v) for k, v in figs.items()}


In [7]:
roomdata_path = os.path.join(REPORT_DIR, "roomdata.json")
roomdata_sample = None
if os.path.exists(roomdata_path):
    with open(roomdata_path, "r", encoding="utf-8") as f:
        rd = json.load(f)

    days = rd.get("room", {}).get("days", {})
    if days:
        first_day_key = sorted(days.keys())[0]
        roomdata_sample = {
            "room": {
                "name": rd["room"]["name"],
                "id": rd["room"]["id"],
                "days": {first_day_key: days[first_day_key]},
            }
        }


In [9]:
def scores_table_html(df):
    agg = (
        df.groupby(["variable", "cible"])
        .agg(MAE=("MAE", "mean"), RMSE=("RMSE", "mean"))
        .reset_index()
    )
    agg = agg.sort_values(["variable", "cible"])
    return agg.to_html(index=False, float_format=lambda x: f"{x:.1f}")


generated_at = datetime.now().strftime("%Y-%m-%d %H:%M")

html = f"""
<!doctype html>
<html lang="fr">
<head>
<meta charset="utf-8"/>
<title>Rapport Modélisation IoT – Salle C013</title>
<style>
  body {{ font-family: -apple-system, Segoe UI, Roboto, Arial, sans-serif; margin: 24px; color: #222; }}
  h1 {{ margin-bottom: 0; }}
  h2 {{ margin-top: 28px; }}
  .card {{ border: 1px solid #eee; border-radius: 12px; padding: 16px; margin: 16px 0; box-shadow: 0 1px 4px rgba(0,0,0,0.04); }}
  table {{ border-collapse: collapse; width: 100%; }}
  th, td {{ border: 1px solid #eee; padding: 8px 10px; text-align: left; }}
  th {{ background: #fafafa; }}
  .grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); gap: 16px; }}
  .imgcard img {{ width: 100%; height: auto; border-radius: 8px; border: 1px solid #eee; }}
  code, pre {{ background: #f6f8fa; padding: 2px 6px; border-radius: 6px; }}
</style>
</head>
<body>
  <h1>Rapport de modélisation – Salle C013</h1>
  <p>Généré le <b>{generated_at}</b></p>

  <div class="card">
    <h2>Résumé</h2>
    <ul>
      <li>Variables prédites : <b>Température</b>, <b>Humidité</b>, <b>Pression</b> aux horizons <b>+1h</b> et <b>+2h</b>.</li>
      <li>Modèles utilisés : <b>Random Forest</b> (multi-sorties, tunés) sur features du Notebook 02.</li>
      <li>Validation : <b>TimeSeriesSplit(n=5)</b>. Métriques : MAE, RMSE (arrondies à 0.1).</li>
    </ul>
  </div>

  <div class="card">
    <h2>Scores</h2>
    {scores_table_html(df_scores)}
  </div>

  <div class="card">
    <h2>Observé vs Prédit</h2>
    <div class="grid">
"""

for title, uri in fig_uris.items():
    if uri:
        html += f"""
      <div class="imgcard">
        <h3>{title}</h3>
        <img src="{uri}" alt="{title}"/>
      </div>
"""

html += """
    </div>
  </div>
"""

if roomdata_sample is not None:
    room_snippet = json.dumps(roomdata_sample, ensure_ascii=False, indent=2)
    html += f"""
  <div class="card">
    <h2>Format d'export (RoomData) — Extrait</h2>
    <p>Fichier complet : <code>reports/roomdata.json</code></p>
    <pre>{room_snippet}</pre>
  </div>
"""

html += """
  <div class="card">
    <h2>Notes</h2>
    <ul>
      <li>Les scores sont recalculés ici avec le même TimeSeriesSplit pour assurer la cohérence.</li>
      <li>Les valeurs affichées sont arrondies pour la lisibilité ; les fichiers sources conservent la précision.</li>
    </ul>
  </div>
</body>
</html>
"""

out_html = os.path.join(REPORT_DIR, "modeling_report.html")
with open(out_html, "w", encoding="utf-8") as f:
    f.write(html)

print("Rapport HTML généré :", out_html)


Rapport HTML généré : ../reports/modeling_report.html
