In [44]:
# -*- coding: utf-8 -*-
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import re
import numpy as np
import pandas as pd
import json

# ============================================================
# 0) CONFIG — CORRIGÉE (1 jour = 2h)
# ============================================================
L1_PATH = "./data/2025-08/L1.20250818-DataMathsElysa.xlsx"
L3_PATH = "./data/2025-08/L3.features_by_student.byPretest.csv"

OUT_DIR = "./out/202602/out_pomdp"
os.makedirs(OUT_DIR, exist_ok=True)

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L0-{fname}")

# --- CORRECTION  ---
# La séquence journalière est composée de 4 activités.
# Cette séquence complète dure 2h.
HOURS_PER_DAY = 2.0
ACTIVITIES_PER_DAY = 4

# (Pour info seulement, si besoin un jour)
HOURS_PER_ACTIVITY = HOURS_PER_DAY / ACTIVITIES_PER_DAY  # 0.5h = 30 min

print(f"--- CONFIG SECTION 1 ---")
print(f"HOURS_PER_DAY : {HOURS_PER_DAY} h (Durée totale de la séquence de 4 activités)")
print(f"ACTIVITIES_PER_DAY : {ACTIVITIES_PER_DAY}")

# ============================================================
# 1) LECTURE FICHIERS
# ============================================================
assert os.path.exists(L1_PATH), f"Introuvable: {L1_PATH}"
assert os.path.exists(L3_PATH), f"Introuvable: {L3_PATH}"

xls = pd.ExcelFile(L1_PATH)
print("L1 — Onglets détectés:", xls.sheet_names)

# Lire les deux onglets (on n'impose pas le nom exact, on mappe par heuristique)
df_sheets = {name: pd.read_excel(L1_PATH, sheet_name=name) for name in xls.sheet_names}

# Heuristique: onglet "Résultatsélèves" = contient Pretest/Final + IDélèves
# onglet "Activités" = contient Semaine/Jour + colonnes activités 0..4
def _score_results_sheet(d: pd.DataFrame) -> int:
    cols = {c.strip(): c for c in d.columns}
    score = 0
    for k in ["IDélèves","Pretest","Final","Age","Genre","Zone"]:
        if k in cols: score += 2
    if "Nombre d'heures de remédiation fait au total" in cols: score += 2
    return score

def _score_activities_sheet(d: pd.DataFrame) -> int:
    cols = [str(c) for c in d.columns]
    score = 0
    if any(c.lower().strip() == "semaine" for c in cols): score += 2
    if any(c.lower().strip() == "jour" for c in cols): score += 2
    # beaucoup de colonnes après les 2 premières
    if len(cols) > 20: score += 2
    return score

sheet_scores = []
for name, d in df_sheets.items():
    sheet_scores.append((name, _score_results_sheet(d), _score_activities_sheet(d)))

# Choix
results_sheet = sorted(sheet_scores, key=lambda t: t[1], reverse=True)[0][0]
activities_sheet = sorted(sheet_scores, key=lambda t: t[2], reverse=True)[0][0]

df_results = df_sheets[results_sheet].copy()
df_acts = df_sheets[activities_sheet].copy()
df_l3 = pd.read_csv(L3_PATH)

print("\n L1 (résultats) choisi:", results_sheet, "| shape:", df_results.shape)
print(" L1 (activités) choisi:", activities_sheet, "| shape:", df_acts.shape)
print(" L3 chargé:", "| shape:", df_l3.shape)

# ============================================================
# 2) NORMALISATION DES COLONNES (L1 Résultats)
# ============================================================
def _norm_colname(s: str) -> str:
    s = str(s).strip()
    s = s.replace("\n", " ").replace("\r", " ")
    s = re.sub(r"\s+", " ", s)
    return s

df_results.columns = [_norm_colname(c) for c in df_results.columns]
df_acts.columns = [_norm_colname(c) for c in df_acts.columns]
df_l3.columns = [_norm_colname(c) for c in df_l3.columns]

# Renommage standard
rename_results = {
    "IDélèves": "StudentID",
    "Nombre d'heures de remédiation fait au total": "HoursTotal_L1",
    "Pretest": "Pretest",
    "Final": "Final",
    "Age": "Age",
    "Classe": "Classe",
    "Genre": "Genre",
    "Zone": "Zone",
}
for k, v in rename_results.items():
    if k in df_results.columns:
        df_results.rename(columns={k: v}, inplace=True)

# Coercitions types
if "StudentID" in df_results.columns:
    df_results["StudentID"] = df_results["StudentID"].astype(str).str.strip()

for c in ["Age", "Pretest", "Final", "HoursTotal_L1"]:
    if c in df_results.columns:
        df_results[c] = pd.to_numeric(df_results[c], errors="coerce")

# bornage niveaux 1..5
def cap_level(x):
    try:
        v = int(x)
    except Exception:
        return np.nan
    return min(5, max(1, v))

if "Pretest" in df_results.columns:
    df_results["Pretest_i"] = df_results["Pretest"].apply(cap_level)
if "Final" in df_results.columns:
    df_results["Final_i"] = df_results["Final"].apply(cap_level)

# ============================================================
# 3) NORMALISATION (L1 Activités)
# ============================================================
# Attendu: colonnes "Semaine", "Jour", puis activités groupées par préfixes (Deb, Unchiffe, Deuxchiffres, TroisA, TroisB)
# Standardiser "Semaine" et "Jour"
col_week = None
col_day = None
for c in df_acts.columns:
    cl = c.lower().strip()
    if cl == "semaine": col_week = c
    if cl == "jour": col_day = c

if col_week is None or col_day is None:
    raise ValueError("Onglet Activités: colonnes 'Semaine' et/ou 'Jour' introuvables après normalisation.")

df_acts.rename(columns={col_week: "Week", col_day: "Day"}, inplace=True)
df_acts["Week"] = pd.to_numeric(df_acts["Week"], errors="coerce")
df_acts["Day"] = pd.to_numeric(df_acts["Day"], errors="coerce")

# Colonnes d'activités = tout sauf Week/Day
activity_cols = [c for c in df_acts.columns if c not in ["Week","Day"]]

# Forcer numériques (0..4)
for c in activity_cols:
    df_acts[c] = pd.to_numeric(df_acts[c], errors="coerce")

# Détecter les préfixes bloc (avant le premier espace)
def block_prefix(col: str) -> str:
    s = str(col).strip()
    # ex: "Deb Lecture de la Table d’addition"
    return s.split(" ", 1)[0] if " " in s else s

df_blocks = pd.Series([block_prefix(c) for c in activity_cols], name="Block")
block_counts = df_blocks.value_counts().to_dict()

print("\nBlocs détectés (préfixes) dans L1/Activités:")
for k, v in sorted(block_counts.items(), key=lambda kv: (-kv[1], kv[0])):
    print(f" - {k}: {v} colonnes")

# Marquer bloc pour chaque colonne activité
col_to_block = {c: block_prefix(c) for c in activity_cols}

# ============================================================
# 4) CONTROLES  (L1 Activités)
# ============================================================
# 4.1 Vérifier que chaque jour contient exactement 4 activités exécutées (valeurs 1..4)
def count_executed_per_day(row: pd.Series) -> int:
    vals = row[activity_cols].values
    return int(np.sum(np.isin(vals, [1,2,3,4])))

exec_counts = df_acts.apply(count_executed_per_day, axis=1)
print("\nContrôle: nb d'activités exécutées par jour (attendu = 4):")
print(exec_counts.describe())

bad_days = df_acts.loc[exec_counts != 4, ["Week","Day"]].copy()
bad_days["ExecutedCount"] = exec_counts[exec_counts != 4].values
print(f"Jours anormaux (ExecutedCount != 4): {len(bad_days)}")
if len(bad_days) > 0:
    print(bad_days.head(20).to_string(index=False))

# 4.2 Vérifier présence des positions 1..4 dans chaque ligne (si 4 activités, on veut idéalement 1,2,3,4 chacune une fois)
def missing_positions(row: pd.Series):
    vals = row[activity_cols].values
    pos = set([int(v) for v in vals if v in [1,2,3,4]])
    missing = [p for p in [1,2,3,4] if p not in pos]
    return missing

miss_pos = df_acts.apply(missing_positions, axis=1)
n_miss = miss_pos.apply(len)
print("\nContrôle: positions manquantes dans la journée (attendu = 0):")
print(n_miss.value_counts().sort_index().to_string())

# ============================================================
# 5) CONTROLES (L1 Résultats) + (L3)
# ============================================================
print("\nL1 Résultats — aperçu colonnes:", df_results.columns.tolist())

# Checks simples niveaux
if "Pretest_i" in df_results.columns:
    print("\nDistribution Pretest_i (L1):")
    print(df_results["Pretest_i"].value_counts(dropna=False).sort_index().to_string())

if "Final_i" in df_results.columns:
    print("\nDistribution Final_i (L1):")
    print(df_results["Final_i"].value_counts(dropna=False).sort_index().to_string())

if "HoursTotal_L1" in df_results.columns:
    print("\nHoursTotal_L1 — stats:")
    print(df_results["HoursTotal_L1"].describe().to_string())

# L3 checks
print("\nL3 — colonnes importantes ")
for c in ["HoursTotal","Pretest","Final","Delta","Mastery_ge4","Age","Genre","Zone","LevelTag"]:
    print(f" - {c}: {'OK' if c in df_l3.columns else 'ABSENT'}")

if "HoursTotal" in df_l3.columns:
    df_l3["HoursTotal"] = pd.to_numeric(df_l3["HoursTotal"], errors="coerce")
    print("\nHoursTotal (L3) — stats:")
    print(df_l3["HoursTotal"].describe().to_string())

# ============================================================
# 6) (OPTIONNEL) COHERENCE L1 vs L3 : heures totales
# ============================================================
has_id_l3 = any(c.lower() in ["studentid","idélèves","ideleves","id_eleve","id"] for c in df_l3.columns)

if has_id_l3:
    # repérer la colonne id la plus plausible
    cand = None
    for c in df_l3.columns:
        if c.lower() in ["studentid","idélèves","ideleves","id_eleve","id"]:
            cand = c
            break
    if cand and "StudentID" in df_results.columns:
        df_l3["_StudentID"] = df_l3[cand].astype(str).str.strip()
        df_merge = df_results.merge(df_l3, left_on="StudentID", right_on="_StudentID", how="inner", suffixes=("_L1","_L3"))
        print("\nMerge L1-L3 via StudentID:", df_merge.shape)

        if "HoursTotal_L1" in df_merge.columns and "HoursTotal" in df_merge.columns:
            diff = (df_merge["HoursTotal"] - df_merge["HoursTotal_L1"])
            print("\nCohérence HoursTotal: (L3 - L1) stats")
            print(diff.describe().to_string())
else:
    print("\nInfo: L3 ne contient pas d'ID explicite (StudentID). On ne merge pas L1 et L3 à cette étape.")

# ============================================================
# 7) SAUVEGARDES “CLEAN”
# ============================================================
# Sauver L1 clean
df_results.to_csv(out_path("L1_results_clean.csv"), index=False, encoding="utf-8-sig")
df_acts.to_csv(out_path("L1_activities_clean.csv"), index=False, encoding="utf-8-sig")

# Sauver un petit dictionnaire colonne->bloc pour réutilisation
df_colblock = pd.DataFrame({
    "ActivityColumn": activity_cols,
    "Block": [col_to_block[c] for c in activity_cols]
})
df_colblock.to_csv(out_path("L1_activity_column_blocks.csv"), index=False, encoding="utf-8-sig")

# Sauver une synthèse simple avec les NOUVELLES CONSTANTES
summary = {
    "L1_path": L1_PATH,
    "L3_path": L3_PATH,
    "L1_results_sheet": results_sheet,
    "L1_activities_sheet": activities_sheet,
    "L1_results_shape": df_results.shape,
    "L1_activities_shape": df_acts.shape,
    "L3_shape": df_l3.shape,
    "HOURS_PER_DAY": HOURS_PER_DAY,          # 2.0
    "ACTIVITIES_PER_DAY": ACTIVITIES_PER_DAY, # 4
    "HOURS_PER_ACTIVITY": HOURS_PER_ACTIVITY, # 0.5
    "blocks_detected": block_counts,
    "n_bad_days_executedcount_ne_4": int((exec_counts != 4).sum()),
    "n_days_missing_positions": int((n_miss > 0).sum()),
    "outputs": {
        "L1_results_clean": out_path("L1_results_clean.csv"),
        "L1_activities_clean": out_path("L1_activities_clean.csv"),
        "L1_activity_column_blocks": out_path("L1_activity_column_blocks.csv"),
    }
}
with open(out_path("SECTION1_summary.json"), "w", encoding="utf-8") as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)

print("\n SECTION 1 terminée.")
print("Fichiers produits:")
print(" -", out_path("L1_results_clean.csv"))
print(" -", out_path("L1_activities_clean.csv"))
print(" -", out_path("L1_activity_column_blocks.csv"))
print(" -", out_path("SECTION1_summary.json")) 

--- CONFIG SECTION 1 ---
HOURS_PER_DAY : 2.0 h (Durée totale de la séquence de 4 activités)
ACTIVITIES_PER_DAY : 4
L1 — Onglets détectés: ['Résultatsélèves', 'Activités']

 L1 (résultats) choisi: Résultatsélèves | shape: (813, 8)
 L1 (activités) choisi: Activités | shape: (10, 54)
 L3 chargé: | shape: (813, 265)

Blocs détectés (préfixes) dans L1/Activités:
 - Deuxchiffres: 13 colonnes
 - TroisA: 11 colonnes
 - Deb: 10 colonnes
 - Unchiffe: 10 colonnes
 - TroisB: 8 colonnes

Contrôle: nb d'activités exécutées par jour (attendu = 4):
count    10.000000
mean     20.200000
std       0.632456
min      20.000000
25%      20.000000
50%      20.000000
75%      20.000000
max      22.000000
dtype: float64
Jours anormaux (ExecutedCount != 4): 10
 Week  Day  ExecutedCount
    1    1             20
    1    2             20
    1    3             20
    1    4             20
    1    5             20
    2    6             20
    2    7             20
    2    8             22
    2    9          

In [45]:
# -*- coding: utf-8 -*-
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import re
import json
import numpy as np
import pandas as pd

OUT_DIR = "./out/202602/out_pomdp"
os.makedirs(OUT_DIR, exist_ok=True)

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L0-{fname}")

L1_ACTS_CLEAN = out_path("L1_activities_clean.csv")
L1_COLBLOCK   = out_path("L1_activity_column_blocks.csv")

assert os.path.exists(L1_ACTS_CLEAN), "Exécute d’abord la SECTION 1 (L1_activities_clean.csv introuvable)."
assert os.path.exists(L1_COLBLOCK),   "Exécute d’abord la SECTION 1 (L1_activity_column_blocks.csv introuvable)."

df_acts = pd.read_csv(L1_ACTS_CLEAN)
df_colblock = pd.read_csv(L1_COLBLOCK)

# Attendu: colonnes Week/Day
assert "Week" in df_acts.columns and "Day" in df_acts.columns, "Week/Day introuvables dans L1_activities_clean.csv"

# ============================================================
# 1) IDENTIFIER BLOCS + ACTIVITÉS
# ============================================================
activity_cols = df_colblock["ActivityColumn"].astype(str).tolist()
col_to_block = dict(zip(df_colblock["ActivityColumn"].astype(str), df_colblock["Block"].astype(str)))

blocks = sorted(df_colblock["Block"].unique().tolist())
print("Blocs détectés:", blocks)

# Activités par bloc
block_to_cols = {b: [c for c in activity_cols if col_to_block.get(c) == b] for b in blocks}

# Fonction: extraire “nom d’activité” (sans préfixe bloc)
def activity_name(col: str) -> str:
    s = str(col).strip()
    # retire le préfixe "Bloc "
    # ex: "Deb Lecture du Tableau de nombres" -> "Lecture du Tableau de nombres"
    parts = s.split(" ", 1)
    return parts[1].strip() if len(parts) > 1 else s

block_to_activities = {b: [activity_name(c) for c in block_to_cols[b]] for b in blocks}

# ============================================================
# 2) RECONSTRUIRE SEQUENCE (pos1..pos4) POUR UN JOUR ET UN BLOC
# ============================================================
def extract_day_sequence(row: pd.Series, cols_for_block: list):
    """
    Retourne:
      - seq = {pos1:act, pos2:act, pos3:act, pos4:act}
      - quality flags (missing positions, duplicate positions, etc.)
    """
    # valeurs dans les colonnes du bloc
    vals = row[cols_for_block].values
    # map position -> indices d’activités (peut être 0, 1 ou plusieurs si data anormale)
    pos_to_idx = {p: np.where(vals == p)[0].tolist() for p in [1,2,3,4]}

    seq = {}
    issues = []
    for p in [1,2,3,4]:
        idxs = pos_to_idx[p]
        if len(idxs) == 0:
            seq[f"pos{p}"] = None
            issues.append(f"missing_pos{p}")
        elif len(idxs) > 1:
            # anomalie: plusieurs activités marquées même position
            seq[f"pos{p}"] = activity_name(cols_for_block[idxs[0]])
            issues.append(f"multi_pos{p}")
        else:
            seq[f"pos{p}"] = activity_name(cols_for_block[idxs[0]])

    # contrôle: nb activités exécutées (= count of vals in {1,2,3,4})
    executed = int(np.sum(np.isin(vals, [1,2,3,4])))
    if executed != 4:
        issues.append(f"executed_count={executed}")

    return seq, issues

# ============================================================
# 3) CONSTRUIRE TABLES LONGUES (par jour, par bloc)
# ============================================================
rows_long = []
rows_list = []
rows_vector = []

for _, r in df_acts.iterrows():
    wk = int(r["Week"]) if pd.notna(r["Week"]) else None
    dy = int(r["Day"]) if pd.notna(r["Day"]) else None

    for b in blocks:
        cols_b = block_to_cols[b]
        if not cols_b:
            continue

        seq, issues = extract_day_sequence(r, cols_b)

        # représentation liste ordonnée [pos1,pos2,pos3,pos4]
        seq_list = [seq["pos1"], seq["pos2"], seq["pos3"], seq["pos4"]]

        # représentation “vecteur 0..4” sur toutes les activités du bloc
        # (même logique que ton format Day i: [0,4,0,2,...])
        # chaque entrée = 0 si non exécutée, sinon position 1..4
        v = []
        for c in cols_b:
            x = r[c]
            if pd.isna(x):
                v.append(0)
            else:
                xi = int(x)
                v.append(xi if xi in [1,2,3,4] else 0)

        rows_long.append({
            "Week": wk, "Day": dy, "Block": b,
            "pos1": seq["pos1"], "pos2": seq["pos2"], "pos3": seq["pos3"], "pos4": seq["pos4"],
            "Issues": "|".join(issues) if issues else ""
        })

        rows_list.append({
            "Week": wk, "Day": dy, "Block": b,
            "Sequence": json.dumps(seq_list, ensure_ascii=False),
            "Issues": "|".join(issues) if issues else ""
        })

        # vector row (wide)
        row_vec = {"Week": wk, "Day": dy, "Block": b, "Issues": "|".join(issues) if issues else ""}
        # noms de colonnes lisibles
        for i, c in enumerate(cols_b, start=1):
            row_vec[f"act_{i}:{activity_name(c)}"] = v[i-1]
        rows_vector.append(row_vec)

df_long = pd.DataFrame(rows_long)
df_list = pd.DataFrame(rows_list)
df_vec  = pd.DataFrame(rows_vector)

# ============================================================
# 4) STATS QUALITE PAR BLOC
# ============================================================
def issues_stats(df_long_block: pd.DataFrame):
    issues = df_long_block["Issues"].fillna("").astype(str)
    total = len(df_long_block)
    n_ok = int((issues == "").sum())
    n_bad = total - n_ok

    # compter chaque type d’issue
    counter = {}
    for s in issues:
        if not s:
            continue
        parts = s.split("|")
        for p in parts:
            counter[p] = counter.get(p, 0) + 1

    return {"total_days": total, "ok": n_ok, "bad": n_bad, "issue_counts": counter}

block_quality = {b: issues_stats(df_long[df_long["Block"] == b]) for b in blocks}

print("\nQualité par bloc (résumé):")
for b in blocks:
    q = block_quality[b]
    print(f" - {b}: total={q['total_days']} ok={q['ok']} bad={q['bad']}")

# ============================================================
# 5) SAUVEGARDES
# ============================================================
df_long.to_csv(out_path("SECTION2_L1_sequences_by_block_long.csv"), index=False, encoding="utf-8-sig")
df_list.to_csv(out_path("SECTION2_L1_sequences_by_block_list.csv"), index=False, encoding="utf-8-sig")
df_vec.to_csv(out_path("SECTION2_L1_sequences_by_block_vector.csv"), index=False, encoding="utf-8-sig")

with open(out_path("SECTION2_block_to_activities.json"), "w", encoding="utf-8") as f:
    json.dump(block_to_activities, f, ensure_ascii=False, indent=2)

with open(out_path("SECTION2_block_quality.json"), "w", encoding="utf-8") as f:
    json.dump(block_quality, f, ensure_ascii=False, indent=2)

print("\n SECTION 2 terminée.")
print("Fichiers produits:")
print(" -", out_path("SECTION2_L1_sequences_by_block_long.csv"))
print(" -", out_path("SECTION2_L1_sequences_by_block_list.csv"))
print(" -", out_path("SECTION2_L1_sequences_by_block_vector.csv"))
print(" -", out_path("SECTION2_block_to_activities.json"))
print(" -", out_path("SECTION2_block_quality.json"))

# ============================================================
# 6) (OPTION) APERCU  : afficher 3 jours pour chaque bloc
# ============================================================
print("\nAperçu  (3 premiers jours par bloc) :")
for b in blocks:
    sub = df_long[df_long["Block"] == b].head(3)
    if len(sub) == 0:
        continue
    print(f"\n--- Bloc {b} ---")
    for _, rr in sub.iterrows():
        print(f"Week {rr['Week']} Day {rr['Day']}: pos1={rr['pos1']} | pos2={rr['pos2']} | pos3={rr['pos3']} | pos4={rr['pos4']}"
              + (f"  [Issues: {rr['Issues']}]" if rr['Issues'] else ""))


Blocs détectés: ['Deb', 'Deuxchiffres', 'TroisA', 'TroisB', 'Unchiffe']

Qualité par bloc (résumé):
 - Deb: total=10 ok=10 bad=0
 - Deuxchiffres: total=10 ok=9 bad=1
 - TroisA: total=10 ok=9 bad=1
 - TroisB: total=10 ok=10 bad=0
 - Unchiffe: total=10 ok=10 bad=0

 SECTION 2 terminée.
Fichiers produits:
 - ./out/202602/out_pomdp\L0-SECTION2_L1_sequences_by_block_long.csv
 - ./out/202602/out_pomdp\L0-SECTION2_L1_sequences_by_block_list.csv
 - ./out/202602/out_pomdp\L0-SECTION2_L1_sequences_by_block_vector.csv
 - ./out/202602/out_pomdp\L0-SECTION2_block_to_activities.json
 - ./out/202602/out_pomdp\L0-SECTION2_block_quality.json

Aperçu  (3 premiers jours par bloc) :

--- Bloc Deb ---
Week 1 Day 1: pos1=Lecture du Tableau de nombres | pos2=Activité avec bâtonnets et paquet | pos3=Gymn aux nombres | pos4=Exercices d’opérations
Week 1 Day 2: pos1=Lecture du Tableau de nombres | pos2=Lecture de la Table d’addition | pos3=Opération de base avec problèmes d'addition | pos4=Exercices d’opération

In [46]:
# -*- coding: utf-8 -*-
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import re
import json
import numpy as np
import pandas as pd
from difflib import SequenceMatcher

OUT_DIR = "./out/202602/out_pomdp"
os.makedirs(OUT_DIR, exist_ok=True)

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L0-{fname}")

# Outputs SECTION2
SEQ_LONG = out_path("SECTION2_L1_sequences_by_block_long.csv")
COLBLOCK = out_path("L1_activity_column_blocks.csv")          # from SECTION1
BLOCK_ACTS_JSON = out_path("SECTION2_block_to_activities.json")  # from SECTION2

# L3
L3_PATH = "./data/2025-08/L3.features_by_student.byPretest.csv"  # adapte si besoin
assert os.path.exists(L3_PATH), f"Introuvable L3 : {L3_PATH}"

assert os.path.exists(SEQ_LONG), "Exécute SECTION 2 d’abord (SECTION2_L1_sequences_by_block_long.csv introuvable)."
assert os.path.exists(COLBLOCK), "Exécute SECTION 1 d’abord (L1_activity_column_blocks.csv introuvable)."
assert os.path.exists(BLOCK_ACTS_JSON), "Exécute SECTION 2 d’abord (SECTION2_block_to_activities.json introuvable)."

df_seq_long = pd.read_csv(SEQ_LONG)
df_colblock = pd.read_csv(COLBLOCK)
df_l3 = pd.read_csv(L3_PATH)

# ============================================================
# 1) HELPERS
# ============================================================
def norm_text(s: str) -> str:
    s = str(s).strip().lower()
    s = re.sub(r"[’'`]", " ", s)
    s = re.sub(r"[^a-z0-9]+", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def sim(a: str, b: str) -> float:
    return SequenceMatcher(None, norm_text(a), norm_text(b)).ratio()

def cap_level(x):
    try:
        v = int(x)
    except Exception:
        return np.nan
    return min(5, max(1, v))

# Assure Pretest_i dans L3
if "Pretest_i" not in df_l3.columns:
    if "Pretest" in df_l3.columns:
        df_l3["Pretest_i"] = df_l3["Pretest"].apply(cap_level)
    else:
        raise ValueError("L3 doit contenir Pretest (ou Pretest_i).")

# ============================================================
# 2) (A) Déduire LevelTag -> Level via distribution Pretest
# ============================================================
leveltag_to_level = {}
leveltag_diag_rows = []

if "LevelTag" in df_l3.columns:
    for tag, g in df_l3.dropna(subset=["LevelTag","Pretest_i"]).groupby("LevelTag"):
        # niveau dominant (mode) dans ce tag
        counts = g["Pretest_i"].astype(int).value_counts().sort_index()
        if len(counts) == 0:
            continue
        dominant_level = int(counts.idxmax())
        share = float(counts.max() / counts.sum())
        leveltag_to_level[str(tag)] = dominant_level
        leveltag_diag_rows.append({
            "LevelTag": str(tag),
            "DominantLevel": dominant_level,
            "DominantShare": round(share, 4),
            "Counts": json.dumps({int(k): int(v) for k,v in counts.to_dict().items()})
        })

df_leveltag_diag = pd.DataFrame(leveltag_diag_rows).sort_values(
    ["DominantLevel","DominantShare"], ascending=[True, False]
)

df_leveltag_diag.to_csv(out_path("SECTION3_leveltag_to_level.csv"), index=False, encoding="utf-8-sig")

print("\n[Diag] LevelTag -> Level (à partir de L3 / Pretest_i):")
if len(df_leveltag_diag) == 0:
    print(" - Aucun LevelTag exploitable trouvé dans L3 (colonne LevelTag absente ou vide).")
else:
    print(df_leveltag_diag.head(30).to_string(index=False))

# ============================================================
# 3) (B) Mapper Block (L1) -> Level via matching texte Block <-> LevelTag
# ============================================================
blocks = sorted(df_colblock["Block"].astype(str).unique().tolist())

# si pas de LevelTag dans L3, fallback: mapping ordinal par fréquence d’apparition
# (moins fiable, mais au moins automatique)
has_leveltag = ("LevelTag" in df_l3.columns) and (len(df_leveltag_diag) > 0)

mapping_rows = []
block_to_level = {}

# Option: override manuel si tu veux forcer certains cas après inspection
MANUAL_OVERRIDE_BLOCK_TO_LEVEL = {
    # Exemple (à compléter si besoin):
    # "TroisA": 4,
    # "TroisB": 5,
}

if has_leveltag:
    tags = list(leveltag_to_level.keys())

    for b in blocks:
        if b in MANUAL_OVERRIDE_BLOCK_TO_LEVEL:
            lvl = int(MANUAL_OVERRIDE_BLOCK_TO_LEVEL[b])
            block_to_level[b] = lvl
            mapping_rows.append({
                "Block": b,
                "MatchedLevelTag": "__MANUAL_OVERRIDE__",
                "Level": lvl,
                "Similarity": 1.0,
                "Method": "manual_override"
            })
            continue

        # meilleur match par similarité texte
        scored = [(t, sim(b, t)) for t in tags]
        scored.sort(key=lambda x: x[1], reverse=True)

        best_tag, best_score = scored[0]
        lvl = int(leveltag_to_level[best_tag])

        # garde aussi les 3 meilleurs pour audit
        top3 = scored[:3]
        mapping_rows.append({
            "Block": b,
            "MatchedLevelTag": best_tag,
            "Level": lvl,
            "Similarity": round(float(best_score), 4),
            "Top3": json.dumps([(t, round(float(sc),4)) for t,sc in top3], ensure_ascii=False),
            "Method": "text_similarity_block_vs_leveltag"
        })
        block_to_level[b] = lvl

else:
    # Fallback: ordonner les blocs par “activité dans le temps”:
    # hypothèse: les blocs “débutants” apparaissent plus tôt (Week/Day petits)
    # => on calcule pour chaque block la médiane du (Week,Day) quand il est réellement utilisé (>=1)
    # puis on mappe les 5 premiers rangs -> niveaux 1..5
    print("\n[WARN] Fallback sans LevelTag: mapping ordinal basé sur la temporalité (moins fiable).")

    # construit un score “timing” par block
    timing = []
    # utilisation d’un jour si au moins une des pos1..pos4 est non nulle
    used = df_seq_long.copy()
    used["is_used"] = used[["pos1","pos2","pos3","pos4"]].notna().any(axis=1).astype(int)

    for b in blocks:
        gb = used[(used["Block"] == b) & (used["is_used"] == 1)]
        if len(gb) == 0:
            # si jamais bloc jamais utilisé (rare), on met un grand score
            med = 10**9
        else:
            # score = median(Week*100 + Day)
            med = float(np.median(gb["Week"].fillna(0).values * 100 + gb["Day"].fillna(0).values))
        timing.append((b, med))

    timing.sort(key=lambda x: x[1])
    # si >5 blocs, on “compresse” sur 1..5 par quantiles
    meds = np.array([t[1] for t in timing], dtype=float)
    if len(meds) == 0:
        raise ValueError("Impossible de construire un mapping (aucun bloc détecté).")

    # quantiles
    q = np.quantile(meds, [0.2, 0.4, 0.6, 0.8])
    def quantile_to_level(m):
        if m <= q[0]: return 1
        if m <= q[1]: return 2
        if m <= q[2]: return 3
        if m <= q[3]: return 4
        return 5

    for b, med in timing:
        if b in MANUAL_OVERRIDE_BLOCK_TO_LEVEL:
            lvl = int(MANUAL_OVERRIDE_BLOCK_TO_LEVEL[b])
            method = "manual_override"
        else:
            lvl = quantile_to_level(med)
            method = "timing_quantile"
        block_to_level[b] = lvl
        mapping_rows.append({
            "Block": b,
            "MatchedLevelTag": "",
            "Level": lvl,
            "Similarity": "",
            "Top3": "",
            "Method": method,
            "MedianWeekDayScore": med
        })

df_map = pd.DataFrame(mapping_rows)

df_map.to_csv(out_path("SECTION3_block_to_level.csv"), index=False, encoding="utf-8-sig")
with open(out_path("SECTION3_block_to_level.json"), "w", encoding="utf-8") as f:
    json.dump(block_to_level, f, ensure_ascii=False, indent=2)

print("\n Mapping Block -> Level produit :")
print(df_map[["Block","Level","MatchedLevelTag","Similarity","Method"]].to_string(index=False))

# ============================================================
# 4) (C) Vérif  : cohérence TroisA/TroisB (si présents)
# ============================================================
for key in ["TroisA", "TroisB", "troisA", "troisB"]:
    # on normalise par exact match “Block”
    if key in block_to_level:
        print(f"\n[Check] {key} -> Level {block_to_level[key]} (selon mapping)")
        break

print("\nFichiers produits:")
print(" -", out_path("SECTION3_block_to_level.csv"))
print(" -", out_path("SECTION3_block_to_level.json"))
print(" -", out_path("SECTION3_leveltag_to_level.csv"))



[Diag] LevelTag -> Level (à partir de L3 / Pretest_i):
LevelTag  DominantLevel  DominantShare     Counts
      L1              1            1.0   {"1": 4}
      L2              2            1.0  {"2": 66}
      L3              3            1.0 {"3": 166}
      L4              4            1.0 {"4": 230}
      L5              5            1.0 {"5": 347}

 Mapping Block -> Level produit :
       Block  Level MatchedLevelTag  Similarity                            Method
         Deb      1              L1         0.0 text_similarity_block_vs_leveltag
Deuxchiffres      1              L1         0.0 text_similarity_block_vs_leveltag
      TroisA      1              L1         0.0 text_similarity_block_vs_leveltag
      TroisB      1              L1         0.0 text_similarity_block_vs_leveltag
    Unchiffe      1              L1         0.0 text_similarity_block_vs_leveltag

[Check] TroisA -> Level 1 (selon mapping)

Fichiers produits:
 - ./out/202602/out_pomdp\L0-SECTION3_block_to_level.c

In [47]:
# -*- coding: utf-8 -*-

import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) # Ignore uniquement les warnings de fragmentation

import os
import json
import numpy as np
import pandas as pd

from sklearn.base import clone
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
from sklearn.model_selection import StratifiedKFold

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# optional: xgboost
xgb_ok = True
try:
    import xgboost as xgb
except Exception:
    xgb_ok = False

# persist model
try:
    import joblib
    joblib_ok = True
except Exception:
    joblib_ok = False

OUT_DIR = "./out/202602/out_pomdp"
os.makedirs(OUT_DIR, exist_ok=True)

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L0-{fname}")

L3_PATH = "./data/2025-08/L3.features_by_student.byPretest.csv"  # adapte si besoin
assert os.path.exists(L3_PATH), f"Introuvable : {L3_PATH}"

RANDOM_STATE = 42
EPS = 1e-12

# ============================================================
#  — TEMPS (CORRIGÉ)
# ============================================================
# Séquence journalière = 4 activités = 2h TOTAL
HOURS_PER_DAY = 2.0

STANDARD_DAYS_A = 10.0
STANDARD_HOURS_A = STANDARD_DAYS_A * HOURS_PER_DAY  # 20h (et non 80h)

# seuil pour décider A vs B
# -> proche de 10 jours, on tolère un peu d’écart (ex : 9 jours et + => A)
THRESHOLD_DAYS_FOR_A = 9.0  # => 18h

A, B = "A", "B"

# ============================================================
# 1) LOAD L3 + FEATURES
# ============================================================
df = pd.read_csv(L3_PATH)

def cap_level(x):
    try:
        v = int(x)
    except Exception:
        return np.nan
    return min(5, max(1, v))

if "Pretest_i" not in df.columns:
    if "Pretest" in df.columns:
        df["Pretest_i"] = df["Pretest"].apply(cap_level)
    else:
        raise ValueError("L3 doit contenir Pretest ou Pretest_i")

if "HoursTotal" not in df.columns:
    raise ValueError("L3 doit contenir la colonne HoursTotal (tu as demandé de l'utiliser).")

# ============================================================
# 2) LABEL ACTION OBSERVÉE via HoursTotal -> DaysTotal
# ============================================================
# Correction: on divise par 2.0, pas par 8.0
df["DaysTotal"] = df["HoursTotal"].astype(float) / float(HOURS_PER_DAY)

df["Action_obs"] = np.where(df["DaysTotal"] >= THRESHOLD_DAYS_FOR_A, A, B)

print("\n==================== SECTION 4 — Action label (A/B) via HoursTotal (CORRIGÉ) ====================")
print(f"HOURS_PER_DAY = {HOURS_PER_DAY:.1f}h (séquence journalière), STANDARD_HOURS_A≈{STANDARD_HOURS_A:.1f}h (10 jours)")
print(f"Règle de labellisation observée: Action=A si DaysTotal >= {THRESHOLD_DAYS_FOR_A:.1f} jours (soit >= {THRESHOLD_DAYS_FOR_A*HOURS_PER_DAY:.1f}h), sinon B")

preview_cols = [c for c in ["HoursTotal", "DaysTotal", "Action_obs"] if c in df.columns]
print("\nAperçu HoursTotal/DaysTotal/Action_obs (head 15):")
print(df[preview_cols].head(15).to_string(index=False))

print("\nRépartition Action_obs:")
print(df["Action_obs"].value_counts(dropna=False).to_string())

df.to_csv(out_path("SECTION4_L3_with_action_labels.csv"), index=False, encoding="utf-8-sig")

# ============================================================
# 3) TEST HYPOTHÈSE (Elysa) sur OBSERVÉ: niveaux 1-2 => A, niveaux 3-4-5 => B
# ============================================================
def summarize_action_by_level(df0: pd.DataFrame, label_col="Action_obs"):
    rows = []
    dfx = df0.dropna(subset=["Pretest_i"])
    for lvl, g in dfx.groupby("Pretest_i"):
        lvl = int(lvl)
        n = len(g)
        pA = float((g[label_col] == A).mean()) if n > 0 else np.nan
        pB = float((g[label_col] == B).mean()) if n > 0 else np.nan
        rows.append({"Level": lvl, "N": n, "P(A)": round(pA, 4), "P(B)": round(pB, 4)})
    return pd.DataFrame(rows).sort_values("Level")

df_hyp_obs = summarize_action_by_level(df, label_col="Action_obs")
print("\n[Hypothèse Elysa] Répartition observée (Action_obs) par niveau Pretest:")
print(df_hyp_obs.to_string(index=False))
df_hyp_obs.to_csv(out_path("SECTION4_hypothesis_obs_by_level.csv"), index=False, encoding="utf-8-sig")

# ============================================================
# 4) APPRENDRE π_ML(a|s,features)  (classification binaire A vs B)
# ============================================================
demographic_cols = [c for c in ["Age", "Genre", "Zone", "LevelTag"] if c in df.columns]
state_col = "Pretest_i"
freq_cols = [c for c in df.columns if c.startswith("freq_pos") or c.startswith("freq_all")]

feature_cols = [state_col] + demographic_cols + freq_cols
missing = [c for c in feature_cols if c not in df.columns]
if missing:
    raise ValueError(f"Colonnes manquantes dans L3 pour apprendre π_ML : {missing}")

X = df[feature_cols].copy()
y = (df["Action_obs"] == A).astype(int)  # 1 = A, 0 = B

# Cas dégénéré: une seule classe dans tout le dataset
unique_y = np.unique(y.dropna())
if len(unique_y) < 2:
    const = int(unique_y[0]) if len(unique_y) == 1 else 0
    print("\n==================== SECTION 4 — Policy ML ====================")
    print("Impossible d'entraîner π_ML : y ne contient qu'une seule classe (A uniquement ou B uniquement).")
    print(f"On sort une politique constante: P(A)= {1.0 if const==1 else 0.0}")

    df["piML_P(A)"] = 1.0 if const == 1 else 0.0
    df["piML_P_A"] = df["piML_P(A)"]  # alias pratique
    df["piML_Action_hat"] = A if const == 1 else B

    pred_cols = ["Pretest_i", "HoursTotal", "DaysTotal", "Action_obs", "piML_P(A)", "piML_Action_hat"]
    for c in ["Age", "Genre", "Zone", "LevelTag"]:
        if c in df.columns:
            pred_cols.insert(1, c)

    df[pred_cols].to_csv(out_path("SECTION4_policyML_predictions.csv"), index=False, encoding="utf-8-sig")
    df_hyp_ml = summarize_action_by_level(df, label_col="piML_Action_hat")
    df_hyp_ml.to_csv(out_path("SECTION4_hypothesis_piML_by_level.csv"), index=False, encoding="utf-8-sig")

    # export meta minimal
    with open(out_path("SECTION4_best_policyML_model_meta.json"), "w", encoding="utf-8") as f:
        json.dump({
            "best_model_name": "ConstantPolicy",
            "constant_class_A_is_1": bool(const == 1),
            "threshold_days_for_A": THRESHOLD_DAYS_FOR_A,
            "hours_per_day": HOURS_PER_DAY,
            "standard_days_A": STANDARD_DAYS_A
        }, f, ensure_ascii=False, indent=2)

else:
    # preprocessing
    num_cols = []
    cat_cols = []
    for c in X.columns:
        if c in ["Genre", "Zone", "LevelTag"]:
            cat_cols.append(c)
        else:
            num_cols.append(c)

    preproc = ColumnTransformer(
        transformers=[
            ("num", Pipeline([
                ("imp", SimpleImputer(strategy="median")),
                ("sc", StandardScaler())
            ]), num_cols),
            ("cat", Pipeline([
                ("imp", SimpleImputer(strategy="most_frequent")),
                ("oh", OneHotEncoder(handle_unknown="ignore"))
            ]), cat_cols),
        ],
        remainder="drop"
    )

    candidates = {
        "RandomForestClassifier": Pipeline([
            ("prep", preproc),
            ("m", RandomForestClassifier(
                n_estimators=250, random_state=RANDOM_STATE,
                max_depth=12, n_jobs=1
            ))
        ]),
        "GradientBoostingClassifier": Pipeline([
            ("prep", preproc),
            ("m", GradientBoostingClassifier(
                random_state=RANDOM_STATE, n_estimators=250
            ))
        ]),
        "LogisticRegression": Pipeline([
            ("prep", preproc),
            ("m", LogisticRegression(max_iter=5000))
        ]),
        "SVC": Pipeline([
            ("prep", preproc),
            ("m", SVC(probability=True, random_state=RANDOM_STATE))
        ]),
        "XGBoostClassifier": Pipeline([
            ("prep", preproc),
            ("m", xgb.XGBClassifier(
                n_estimators=300, learning_rate=0.07, max_depth=5,
                subsample=0.9, colsample_bytree=0.9,
                random_state=RANDOM_STATE, n_jobs=1,
                eval_metric="logloss"
            ) if xgb_ok else HistGradientBoostingClassifier(random_state=RANDOM_STATE))
        ])
    }
    if not xgb_ok:
        del candidates["XGBoostClassifier"]
        candidates["HistGradientBoostingClassifier"] = Pipeline([
             ("prep", preproc),
             ("m", HistGradientBoostingClassifier(random_state=RANDOM_STATE))
        ])

    def safe_proba_of_class1(model, Xte):
        if hasattr(model, "predict_proba"):
            proba = model.predict_proba(Xte)
            proba = np.asarray(proba)
            if proba.ndim == 2 and proba.shape[1] >= 2:
                return proba[:, 1]
            if hasattr(model, "classes_") and len(getattr(model, "classes_", [])) == 1:
                only = int(model.classes_[0])
                return np.ones(len(Xte)) * float(only)
            return np.zeros(len(Xte), dtype=float)
        pred = model.predict(Xte)
        return pred.astype(float)

    def eval_policy_ml_cv(X, y, pipe, n_splits=5):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)
        probs_all, preds_all, y_all = [], [], []
        for tr, te in skf.split(X, y):
            m = clone(pipe)
            m.fit(X.iloc[tr], y.iloc[tr])

            pr = safe_proba_of_class1(m, X.iloc[te])
            pr = np.asarray(pr).reshape(-1)

            pd_ = (pr >= 0.5).astype(int)
            probs_all.extend(pr.tolist())
            preds_all.extend(pd_.tolist())
            y_all.extend(y.iloc[te].tolist())

        probs_all = np.array(probs_all)
        preds_all = np.array(preds_all)
        y_all = np.array(y_all)

        auc = np.nan
        if len(np.unique(y_all)) > 1:
            try:
                auc = float(roc_auc_score(y_all, probs_all))
            except Exception:
                auc = np.nan

        return {
            "AUC": auc,
            "F1": float(f1_score(y_all, preds_all, zero_division=0)),
            "ACC": float(accuracy_score(y_all, preds_all))
        }

    print("\n==================== SECTION 4 — Model selection for π_ML(a|s,features) ====================")
    rows = []
    for name, pipe in candidates.items():
        sc = eval_policy_ml_cv(X, y, pipe, n_splits=5)
        rows.append({"Model": name,
                     "AUC": (round(sc["AUC"], 4) if isinstance(sc["AUC"], float) and not np.isnan(sc["AUC"]) else sc["AUC"]),
                     "F1": round(sc["F1"], 4),
                     "ACC": round(sc["ACC"], 4)})

    df_sel = pd.DataFrame(rows)

    def _auc_key(v):
        try:
            return float(v)
        except Exception:
            return -np.inf

    df_sel["_AUC_key"] = df_sel["AUC"].apply(lambda v: _auc_key(v) if v is not None and not (isinstance(v, float) and np.isnan(v)) else -np.inf)
    df_sel = df_sel.sort_values(["_AUC_key", "F1", "ACC"], ascending=False).drop(columns=["_AUC_key"])

    print(df_sel.to_string(index=False))
    df_sel.to_csv(out_path("SECTION4_policyML_model_selection.csv"), index=False, encoding="utf-8-sig")

    best_model_name = df_sel.iloc[0]["Model"]
    best_pipe = candidates[best_model_name]
    best_pipe.fit(X, y)

    print(f"\nBest π_ML model = {best_model_name}")

    if joblib_ok:
        joblib.dump(best_pipe, out_path("SECTION4_best_policyML_model.joblib"))
        with open(out_path("SECTION4_best_policyML_model_meta.json"), "w", encoding="utf-8") as f:
            json.dump({
                "best_model_name": best_model_name,
                "threshold_days_for_A": THRESHOLD_DAYS_FOR_A,
                "hours_per_day": HOURS_PER_DAY,
                "standard_days_A": STANDARD_DAYS_A
            }, f, ensure_ascii=False, indent=2)
    else:
        print("[WARN] joblib indisponible: le modèle n'est pas sérialisé.")

    # ============================================================
    # 5) PRODUITS: π_ML prédictions + analyse hypothèse sur π_ML
    # ============================================================
    pA = safe_proba_of_class1(best_pipe, X)
    pA = np.asarray(pA).reshape(-1)

    df["piML_P(A)"] = pA
    df["piML_P_A"] = df["piML_P(A)"]
    df["piML_Action_hat"] = np.where(df["piML_P(A)"] >= 0.5, A, B)

    pred_cols = ["Pretest_i", "HoursTotal", "DaysTotal", "Action_obs", "piML_P(A)", "piML_Action_hat"]
    for c in ["Age", "Genre", "Zone", "LevelTag"]:
        if c in df.columns:
            pred_cols.insert(1, c)

    df[pred_cols].to_csv(out_path("SECTION4_policyML_predictions.csv"), index=False, encoding="utf-8-sig")

    df_hyp_ml = summarize_action_by_level(df, label_col="piML_Action_hat")
    print("\n[Hypothèse Elysa] Décision π_ML (Action_hat) par niveau Pretest:")
    print(df_hyp_ml.to_string(index=False))
    df_hyp_ml.to_csv(out_path("SECTION4_hypothesis_piML_by_level.csv"), index=False, encoding="utf-8-sig")

    support_rows = []
    for group_name, levels, expect_A in [("Low (1-2)", [1, 2], True), ("High (3-5)", [3, 4, 5], False)]:
        g = df[df["Pretest_i"].isin(levels)]
        if len(g) == 0:
            continue
        pA_obs = float((g["Action_obs"] == A).mean())
        pA_ml = float((g["piML_Action_hat"] == A).mean())
        support_rows.append({
            "Group": group_name,
            "N": int(len(g)),
            "Obs P(A)": round(pA_obs, 4),
            "π_ML P(A_hat)": round(pA_ml, 4),
            "Expected": ("A" if expect_A else "B")
        })

    df_support = pd.DataFrame(support_rows)
    print("\n[Hypothèse Elysa] Synthèse groupée:")
    print(df_support.to_string(index=False))
    df_support.to_csv(out_path("SECTION4_hypothesis_support_summary.csv"), index=False, encoding="utf-8-sig")

print("\nFichiers produits (Section 4):")
print(" -", out_path("SECTION4_L3_with_action_labels.csv"))
print(" -", out_path("SECTION4_policyML_model_selection.csv"))
print(" -", out_path("SECTION4_policyML_predictions.csv"))
print(" -", out_path("SECTION4_hypothesis_obs_by_level.csv"))
print(" -", out_path("SECTION4_hypothesis_piML_by_level.csv"))
print(" -", out_path("SECTION4_hypothesis_support_summary.csv"))
if joblib_ok and os.path.exists(out_path("SECTION4_best_policyML_model.joblib")):
    print(" -", out_path("SECTION4_best_policyML_model.joblib"))


HOURS_PER_DAY = 2.0h (séquence journalière), STANDARD_HOURS_A≈20.0h (10 jours)
Règle de labellisation observée: Action=A si DaysTotal >= 9.0 jours (soit >= 18.0h), sinon B

Aperçu HoursTotal/DaysTotal/Action_obs (head 15):
 HoursTotal  DaysTotal Action_obs
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A
         60       30.0          A

Répartition Action_obs:
Action_obs
A    813

[Hypothèse Elysa] Répartition observée (Action_obs) par niveau Pretest:
 Level   N  P(A)  P(B)
     1   4   1.0   0.0
     2  66   1.0   0.0
     3 166   1.0   0.0
     4 230   1.0   0.0

In [48]:
# -*- coding: utf-8 -*-
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os, json
import numpy as np
import pandas as pd

EPS = 1e-12
A, B = "A", "B"
LEVELS = [1, 2, 3, 4, 5]
TRANSIENT = [1, 2, 3, 4]
ABSORBING = 5
N_LEVELS = 5

# ------------------------------------------------------------
# CONFIG TEMPS (CORRIGÉE)
# ------------------------------------------------------------
# Séquence journalière = 4 activités = 2h TOTAL
HOURS_PER_DAY = 2.0
ACTIVITIES_PER_DAY = 4.0

# ------------------------------------------------------------
# PATHS
# ------------------------------------------------------------
OUT_DIR = "./out/202602/out_pomdp"   # adapte si besoin
os.makedirs(OUT_DIR, exist_ok=True)

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L5-{fname}")

L3_PATH = "./data/2025-08/L3.features_by_student.byPretest.csv"  # adapte si besoin
assert os.path.exists(L3_PATH), f"Introuvable : {L3_PATH}"

# Si tu as déjà exporté T_A/T_B dans ton code principal
TA_CSV = os.path.join(OUT_DIR, "L4-AMC_TA_standard.csv")   # from principal
TB_CSV = os.path.join(OUT_DIR, "L4-AMC_TB_intensive.csv")  # from principal

# ------------------------------------------------------------
# UTILITAIRES
# ------------------------------------------------------------
def cap_level(x):
    try:
        v = int(x)
    except Exception:
        return np.nan
    return min(5, max(1, v))

def canonical_QR(T: np.ndarray):
    Q = T[:len(TRANSIENT), :len(TRANSIENT)]
    R = T[:len(TRANSIENT), len(TRANSIENT):]
    return Q, R

def fundamental_matrix_N(Q: np.ndarray):
    I = np.eye(Q.shape[0], dtype=float)
    M = I - Q
    try:
        return np.linalg.inv(M)
    except np.linalg.LinAlgError:
        return np.linalg.pinv(M)

def expected_time_with_costs(T: np.ndarray, cost_vec_transient: np.ndarray):
    """
    Expected cumulative cost until absorption:
      m_cost = N * c
    cost_vec_transient: c_i (heures) pour i=1..4
    """
    Q, _ = canonical_QR(T)
    N = fundamental_matrix_N(Q)
    m = N.dot(cost_vec_transient.reshape(-1, 1)).flatten()
    return {"Q": Q, "N": N, "m": m, "J": float(m.sum())}

def build_T_pi(policy: dict, TA: np.ndarray, TB: np.ndarray) -> np.ndarray:
    T = np.zeros((N_LEVELS, N_LEVELS), dtype=float)
    for s in LEVELS:
        if s == ABSORBING:
            T[s-1, :] = 0.0
            T[s-1, ABSORBING-1] = 1.0
        else:
            act = policy.get(s, A)
            T[s-1, :] = TA[s-1, :] if act == A else TB[s-1, :]
    return T

def policy_all_A():
    return {s: A for s in TRANSIENT}

def bellman_value_iteration_time_cost(TA: np.ndarray, TB: np.ndarray, cost_hours: np.ndarray,
                                      max_iter=20000, tol=1e-12):
    """
    Bellman en HEURES :
      V(5)=0
      V(s)=c(s) + min_a sum_{s'} P(s'|s,a) V(s')
    cost_hours: length 5, cost_hours[4]=0
    """
    V = np.zeros(N_LEVELS, dtype=float)
    # Initialisation pessimiste mais pas infinie
    V[:4] = float(np.max(cost_hours[:4])) * 10.0
    pol = {s: A for s in TRANSIENT}

    for _ in range(max_iter):
        V_old = V.copy()
        for s in TRANSIENT:
            c = float(cost_hours[s-1])
            qA = c + float(np.dot(TA[s-1, :], V_old))
            qB = c + float(np.dot(TB[s-1, :], V_old))
            if qB < qA:
                V[s-1] = qB
                pol[s] = B
            else:
                V[s-1] = qA
                pol[s] = A
        V[ABSORBING-1] = 0.0
        if np.max(np.abs(V - V_old)) < tol:
            break
    return V, pol

def policy_from_piML_predictions(df_with_piML: pd.DataFrame,
                                level_col="Pretest_i",
                                probaA_col="piML_P(A)",
                                threshold=0.5):
    """
    π_ML_state(s) = A si mean(P(A|x)) >= threshold sur les élèves du niveau s, sinon B.
    """
    pol = {}
    for s in TRANSIENT:
        g = df_with_piML[df_with_piML[level_col] == s]
        if len(g) == 0:
            pol[s] = A
            continue
        pA = float(g[probaA_col].mean())
        pol[s] = A if pA >= threshold else B
    return pol

# ------------------------------------------------------------
# (0) LOAD DF
# ------------------------------------------------------------
df = pd.read_csv(L3_PATH)

if "Pretest_i" not in df.columns:
    if "Pretest" in df.columns:
        df["Pretest_i"] = df["Pretest"].apply(cap_level)
    else:
        raise ValueError("L3 doit contenir Pretest ou Pretest_i")

if "HoursTotal" not in df.columns:
    raise ValueError("L3 doit contenir HoursTotal")

# ------------------------------------------------------------
# (1) DEFINIR / CHARGER T_A, T_B
# ------------------------------------------------------------
def empirical_transition_matrix_from_pretest_final(df: pd.DataFrame) -> np.ndarray:
    """
    Estimation empirique de P(Final=j | Pretest=i) (Action A — Standard).
    """
    if "Final_i" not in df.columns:
        if "Final" in df.columns:
            df = df.copy()
            df["Final_i"] = df["Final"].apply(cap_level)
        else:
            raise ValueError("L3 doit contenir Final ou Final_i pour estimer T_A empirique.")

    mat_counts = np.zeros((N_LEVELS, N_LEVELS), dtype=float)
    dfx = df.dropna(subset=["Pretest_i", "Final_i"]).copy()

    for _, r in dfx.iterrows():
        i = int(r["Pretest_i"]) - 1
        j = int(r["Final_i"]) - 1
        mat_counts[i, j] += 1.0

    T = np.zeros_like(mat_counts)
    for i in range(N_LEVELS):
        s = mat_counts[i, :].sum()
        if s > 0:
            T[i, :] = mat_counts[i, :] / s
        else:
            T[i, :] = 1.0 / N_LEVELS

    # force absorbing state 5
    T[ABSORBING-1, :] = 0.0
    T[ABSORBING-1, ABSORBING-1] = 1.0
    return T

def make_intensive_matrix_from_standard(TA: np.ndarray,
                                     diag_shrink: float = 0.80,
                                     regress_shrink: float = 0.70,
                                     boost_to_absorb: float = 1.25) -> np.ndarray:
    """
    Action B — Intensive : contre-factuel normatif (non causal)
    """
    TB = TA.copy().astype(float)
    for i in range(N_LEVELS):
        if i == ABSORBING-1:
            continue
        for j in range(N_LEVELS):
            if j == i:
                TB[i, j] *= diag_shrink
            elif j < i:
                TB[i, j] *= regress_shrink
        TB[i, ABSORBING-1] *= boost_to_absorb
        s = TB[i, :].sum()
        TB[i, :] = (TB[i, :] / s) if s > 0 else (1.0 / N_LEVELS)

    TB[ABSORBING-1, :] = 0.0
    TB[ABSORBING-1, ABSORBING-1] = 1.0
    return TB

def load_T_from_csv(path: str) -> np.ndarray:
    T = pd.read_csv(path, index_col=0)
    arr = T.values.astype(float)
    if arr.shape != (5, 5):
        raise ValueError(f"Matrice {path} doit être 5x5, reçu {arr.shape}")
    return arr

# Priorité: charger depuis CSV L4 si dispo, sinon reconstruire depuis df
if os.path.exists(TA_CSV) and os.path.exists(TB_CSV):
    T_A = load_T_from_csv(TA_CSV)
    T_B = load_T_from_csv(TB_CSV)
    print("\n[INFO] T_A/T_B chargées depuis CSV L4:", TA_CSV, TB_CSV)
else:
    print("\n[INFO] CSV L4 introuvables, reconstruction de T_A/T_B depuis df (L3).")
    T_A = empirical_transition_matrix_from_pretest_final(df)
    T_B = make_intensive_matrix_from_standard(T_A)

# ------------------------------------------------------------
# (2) CALIBRER COÛTS EN HEURES PAR ÉTAT (option 1 fixe, option 2 data)
# ------------------------------------------------------------
# Correction : Coût fixe = 2h (un jour)
cost_hours_fixed = np.array([HOURS_PER_DAY, HOURS_PER_DAY, HOURS_PER_DAY, HOURS_PER_DAY, 0.0], dtype=float)

def calibrate_hours_total_by_level(df: pd.DataFrame):
    stats = (
        df.dropna(subset=["Pretest_i", "HoursTotal"])
          .groupby("Pretest_i")["HoursTotal"]
          .agg(["count", "mean", "median"])
          .reset_index()
          .rename(columns={"Pretest_i": "Level", "count": "N", "mean": "HoursMean", "median": "HoursMedian"})
    )
    return stats.sort_values("Level")

def derive_cost_hours_by_state_from_data(df: pd.DataFrame, T_under_policyA: np.ndarray):
    """
    c_i ≈ mean(HoursTotal | Pretest=i) / t_i(π_A)  (heures par 'étape')
    où t_i(π_A) vient de N=(I-Q)^-1 sous π_A, en unités “étapes”.
    """
    Q, _ = canonical_QR(T_under_policyA)
    N = fundamental_matrix_N(Q)
    t_steps = N.sum(axis=1)  # length 4

    stats = calibrate_hours_total_by_level(df)
    c = np.array([HOURS_PER_DAY]*4, dtype=float)

    for idx, s in enumerate(TRANSIENT):
        row = stats[stats["Level"] == s]
        if len(row) == 0:
            continue
        mean_hours = float(row.iloc[0]["HoursMean"])
        denom = float(t_steps[idx])
        if denom > EPS and mean_hours > EPS:
            c[idx] = mean_hours / denom

    # Correction clip : avec un jour de 2h, on permet des variations larges
    # ex: 0.5h (très rapide) à 10h (5 jours bloqué sur l'étape)
    c = np.clip(c, 0.5, 10.0)
    return np.array([c[0], c[1], c[2], c[3], 0.0], dtype=float), stats, t_steps

# Policy π_A
piA = policy_all_A()
T_piA = build_T_pi(piA, T_A, T_B)

cost_hours_calib, stats_hours, t_steps_piA = derive_cost_hours_by_state_from_data(df, T_piA)

# ------------------------------------------------------------
# (3) CALCULS : π_A, π*_hours, π_ML_state
# ------------------------------------------------------------
print("\n==================== SECTION 5 — Expected time-to-absorption (hours/days) (CORRIGÉ) ====================")
print(f"HOURS_PER_DAY = {HOURS_PER_DAY:.1f}h (séquence journalière)")
print("\n--- Coûts c(s) en heures (par étape) ---")
print("Option FIXE ( 2h):", {s: float(cost_hours_fixed[s-1]) for s in LEVELS})
print("Option CALIBRÉE (data):", {s: float(cost_hours_calib[s-1]) for s in LEVELS})

print("\nHeures observées (HoursTotal) par niveau initial (L3):")
print(stats_hours.to_string(index=False))

res_piA_fixed = expected_time_with_costs(T_piA, cost_hours_fixed[:4])
res_piA_calib = expected_time_with_costs(T_piA, cost_hours_calib[:4])

def _print_expected(res, label, cost_hours_vec):
    print(f"\n[{label}] Temps attendu jusqu’à maîtrise (état 5) — par niveau initial")
    rows = []
    for i, s in enumerate(TRANSIENT):
        hours = float(res["m"][i])
        # Correction : on divise par 2.0 pour avoir les jours
        days = hours / HOURS_PER_DAY
        rows.append({
            "StartLevel": s,
            "ExpectedHours": round(hours, 2),
            "ExpectedDays": round(days, 2),
            "CostHoursPerStep": round(float(cost_hours_vec[s-1]), 2)
        })
    df_out = pd.DataFrame(rows)
    print(df_out.to_string(index=False))
    return df_out

df_piA_fixed = _print_expected(res_piA_fixed, "π_A (A partout) + coût FIXE 2h", cost_hours_fixed)
df_piA_calib = _print_expected(res_piA_calib, "π_A (A partout) + coût CALIBRÉ", cost_hours_calib)

# Bellman optimal en HEURES => π*_hours
V_hours_star, pi_hours_star = bellman_value_iteration_time_cost(T_A, T_B, cost_hours_calib)
T_pi_hours_star = build_T_pi(pi_hours_star, T_A, T_B)
res_pi_star_hours = expected_time_with_costs(T_pi_hours_star, cost_hours_calib[:4])

print("\n--- Politique optimale π*_hours (Bellman en heures) ---")
print("π*_hours (state->A/B):", pi_hours_star)
print("V*_hours (états 1..5):", [round(float(x), 3) for x in V_hours_star])

df_pi_star_hours = _print_expected(res_pi_star_hours, "π*_hours (optimal temps) + coût CALIBRÉ", cost_hours_calib)

# π_ML_state : agrégée par niveau si Section 4 a été exécutée et a produit piML_P(A)
pi_ml_state, df_pi_ml = None, None
if "piML_P(A)" in df.columns:
    pi_ml_state = policy_from_piML_predictions(df, level_col="Pretest_i", probaA_col="piML_P(A)", threshold=0.5)
    T_pi_ml_state = build_T_pi(pi_ml_state, T_A, T_B)
    res_pi_ml = expected_time_with_costs(T_pi_ml_state, cost_hours_calib[:4])

    print("\n--- Politique π_ML_state (agrégée par niveau) ---")
    print("π_ML_state (state->A/B):", pi_ml_state)
    df_pi_ml = _print_expected(res_pi_ml, "π_ML_state + coût CALIBRÉ", cost_hours_calib)
else:
    print("\n[INFO] df ne contient pas piML_P(A). Exécute Section 4 avant si tu veux π_ML_state.")

# ------------------------------------------------------------
# (4) EXPORTS
# ------------------------------------------------------------
export = {
    "HOURS_PER_DAY": HOURS_PER_DAY,
    "cost_hours_fixed": cost_hours_fixed.tolist(),
    "cost_hours_calibrated": cost_hours_calib.tolist(),
    "piA": piA,
    "pi_hours_star": pi_hours_star,
    "V_hours_star": V_hours_star.tolist(),
    "pi_ml_state": (pi_ml_state if pi_ml_state is not None else None),
    "expected_piA_fixed_hours": df_piA_fixed.to_dict(orient="records"),
    "expected_piA_calib_hours": df_piA_calib.to_dict(orient="records"),
    "expected_pi_star_hours": df_pi_star_hours.to_dict(orient="records"),
    "expected_pi_ml_hours": (df_pi_ml.to_dict(orient="records") if df_pi_ml is not None else None)
}

with open(out_path("expected_time_to_absorption_hours_days.json"), "w", encoding="utf-8") as f:
    json.dump(export, f, ensure_ascii=False, indent=2)

pd.DataFrame(export["expected_piA_fixed_hours"]).to_csv(out_path("expected_piA_fixed.csv"), index=False, encoding="utf-8-sig")
pd.DataFrame(export["expected_piA_calib_hours"]).to_csv(out_path("expected_piA_calib.csv"), index=False, encoding="utf-8-sig")
pd.DataFrame(export["expected_pi_star_hours"]).to_csv(out_path("expected_pi_star_hours.csv"), index=False, encoding="utf-8-sig")
if export["expected_pi_ml_hours"] is not None:
    pd.DataFrame(export["expected_pi_ml_hours"]).to_csv(out_path("expected_pi_ml_hours.csv"), index=False, encoding="utf-8-sig")

print("\n Exports (Section 5):")
print(" -", out_path("expected_time_to_absorption_hours_days.json"))
print(" -", out_path("expected_piA_fixed.csv"))
print(" -", out_path("expected_piA_calib.csv"))
print(" -", out_path("expected_pi_star_hours.csv"))
if export["expected_pi_ml_hours"] is not None:
    print(" -", out_path("expected_pi_ml_hours.csv"))


[INFO] CSV L4 introuvables, reconstruction de T_A/T_B depuis df (L3).

HOURS_PER_DAY = 2.0h (séquence journalière)

--- Coûts c(s) en heures (par étape) ---
Option FIXE ( 2h): {1: 2.0, 2: 2.0, 3: 2.0, 4: 2.0, 5: 0.0}
Option CALIBRÉE (data): {1: 10.0, 2: 10.0, 3: 10.0, 4: 10.0, 5: 0.0}

Heures observées (HoursTotal) par niveau initial (L3):
 Level   N  HoursMean  HoursMedian
     1   4  55.000000         60.0
     2  66  50.909091         60.0
     3 166  51.445783         60.0
     4 230  35.565217         20.0
     5 347  47.377522         60.0

[π_A (A partout) + coût FIXE 2h] Temps attendu jusqu’à maîtrise (état 5) — par niveau initial
 StartLevel  ExpectedHours  ExpectedDays  CostHoursPerStep
          1           7.47          3.73               2.0
          2           6.94          3.47               2.0
          3           4.84          2.42               2.0
          4           4.88          2.44               2.0

[π_A (A partout) + coût CALIBRÉ] Temps attendu jusqu’à m

In [49]:
# -*- coding: utf-8 -*-
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os, json
import numpy as np
import pandas as pd

# ------------------------------------------------------------
# CONSTANTES & CONFIGURATION
# ------------------------------------------------------------
EPS = 1e-12
A, B = "A", "B"

LEVELS = [1, 2, 3, 4, 5]
TRANSIENT = [1, 2, 3, 4]
ABSORBING = 5
N_LEVELS = 5

MAX_DAYS_CAP = 60  # sécurité anti-boucle infinie

# ------------------------------------------------------------
# CONFIG TEMPS
# ------------------------------------------------------------
HOURS_PER_DAY = 2.0
ACTIVITIES_PER_DAY = 4.0

# ------------------------------------------------------------
# PATHS OUTPUT
# ------------------------------------------------------------
OUT_DIR = "./out/202602/out_pomdp"
os.makedirs(OUT_DIR, exist_ok=True)

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L6-{fname}")

# ------------------------------------------------------------
# INPUTS (L1/L3 + artefacts L4/L5)
# ------------------------------------------------------------
L1_XLSX_PATH = "./data/2025-08/L1.20250818-DataMathsElysa.xlsx"   # <<< IMPORTANT: chemin réel dans ton environnement
L3_PATH = "./data/2025-08/L3.features_by_student.byPretest.csv"

TA_CSV = os.path.join(OUT_DIR, "L4-AMC_TA_standard.csv")
TB_CSV = os.path.join(OUT_DIR, "L4-AMC_TB_intensive.csv")

SECTION5_JSON = os.path.join(OUT_DIR, "L5-expected_time_to_absorption_hours_days.json")

# ------------------------------------------------------------
# DEFAULTS si variables non définies
# ------------------------------------------------------------
START_LEVEL = int(globals().get("START_LEVEL", 1))
TARGET_LEVEL = int(globals().get("TARGET_LEVEL", 5))

BY_MODEL_NAME = globals().get("BY_MODEL_NAME", "PolicyTimeOptimal")
TECHNIQUE_MDP = globals().get("TECHNIQUE_MDP", "Baseline MDP (fully observed)")
TECHNIQUE_POMDP = globals().get("TECHNIQUE_POMDP", "POMDP approx (HMM belief + projection)")


# ------------------------------------------------------------
# UTILITAIRES GÉNÉRAUX
# ------------------------------------------------------------
def cap_level(x):
    try:
        v = int(x)
    except Exception:
        return np.nan
    return min(5, max(1, v))

def _safe_tag(s: str) -> str:
    return "".join(ch if ch.isalnum() or ch in "-_." else "_" for ch in str(s))

def load_T_from_csv(path: str) -> np.ndarray:
    if not os.path.exists(path):
        return None
    T = pd.read_csv(path, index_col=0)
    arr = T.values.astype(float)
    if arr.shape != (5, 5):
        return None
    return arr

def load_T_from_csv_soft(path: str) -> np.ndarray:
    """Retourne None si absent ou mauvaise taille (fallback safe)."""
    if not os.path.exists(path):
        return None
    try:
        T = pd.read_csv(path, index_col=0)
        arr = T.values.astype(float)
        if arr.shape != (5, 5):
            return None
        return arr
    except Exception:
        return None

def empirical_transition_matrix_from_pretest_final(df: pd.DataFrame) -> np.ndarray:
    """Estimation empirique de P(Final=j | Pretest=i) (Action A — Standard)."""
    if "Final_i" not in df.columns:
        if "Final" in df.columns:
            df = df.copy()
            df["Final_i"] = df["Final"].apply(cap_level)
        else:
            print("[WARN] Pas de colonne Final/Final_i. Matrice identité.")
            return np.eye(N_LEVELS)

    if "Pretest_i" not in df.columns:
        if "Pretest" in df.columns:
            df = df.copy()
            df["Pretest_i"] = df["Pretest"].apply(cap_level)
        else:
            print("[WARN] Pas de colonne Pretest/Pretest_i. Matrice identité.")
            return np.eye(N_LEVELS)

    mat_counts = np.zeros((N_LEVELS, N_LEVELS), dtype=float)
    dfx = df.dropna(subset=["Pretest_i", "Final_i"]).copy()

    for _, r in dfx.iterrows():
        i = int(r["Pretest_i"]) - 1
        j = int(r["Final_i"]) - 1
        if 0 <= i < N_LEVELS and 0 <= j < N_LEVELS:
            mat_counts[i, j] += 1.0

    T = np.zeros_like(mat_counts)
    for i in range(N_LEVELS):
        s = mat_counts[i, :].sum()
        if s > 0:
            T[i, :] = mat_counts[i, :] / s
        else:
            T[i, :] = 1.0 / N_LEVELS

    # état 5 absorbant
    T[ABSORBING-1, :] = 0.0
    T[ABSORBING-1, ABSORBING-1] = 1.0
    return T

def make_intensive_matrix_from_standard(TA: np.ndarray,
                                        diag_shrink: float = 0.80,
                                        regress_shrink: float = 0.70,
                                        boost_to_absorb: float = 1.25) -> np.ndarray:
    """Action B — Intensive : contre-factuel normatif (non causal)."""
    TB = TA.copy().astype(float)
    for i in range(N_LEVELS):
        if i == ABSORBING-1:
            continue
        for j in range(N_LEVELS):
            if j == i:
                TB[i, j] *= diag_shrink
            elif j < i:
                TB[i, j] *= regress_shrink
        TB[i, ABSORBING-1] *= boost_to_absorb
        s = TB[i, :].sum()
        TB[i, :] = (TB[i, :] / s) if s > 0 else (1.0 / N_LEVELS)

    TB[ABSORBING-1, :] = 0.0
    TB[ABSORBING-1, ABSORBING-1] = 1.0
    return TB

def bellman_value_iteration_time_cost(TA: np.ndarray, TB: np.ndarray, cost_hours: np.ndarray,
                                      max_iter=20000, tol=1e-12):
    """
    Bellman en HEURES :
      V(5)=0
      V(s)=c(s) + min_a sum_{s'} P(s'|s,a) V(s')
    """
    V = np.zeros(N_LEVELS, dtype=float)
    V[:4] = float(np.max(cost_hours[:4])) * 10.0
    pol = {s: A for s in TRANSIENT}

    for _ in range(max_iter):
        V_old = V.copy()
        for s in TRANSIENT:
            c = float(cost_hours[s-1])
            qA = c + float(np.dot(TA[s-1, :], V_old))
            qB = c + float(np.dot(TB[s-1, :], V_old))
            if qB < qA:
                V[s-1] = qB
                pol[s] = B
            else:
                V[s-1] = qA
                pol[s] = A
        V[ABSORBING-1] = 0.0
        if np.max(np.abs(V - V_old)) < tol:
            break
    return V, pol

def choose_action(level: int, policy: dict) -> str:
    return policy.get(int(level), A)

def next_state_sample(level: int, action: str, TA: np.ndarray, TB: np.ndarray, rng: np.random.Generator) -> int:
    row = TA[level-1, :] if action == A else TB[level-1, :]
    row = np.clip(row, 0.0, 1.0)
    s = float(row.sum())
    row = (row / s) if s > EPS else (np.ones_like(row) / len(row))
    return int(rng.choice(np.arange(1, N_LEVELS+1), p=row))

def next_state_most_likely(level: int, action: str, TA: np.ndarray, TB: np.ndarray) -> int:
    row = TA[level-1, :] if action == A else TB[level-1, :]
    return int(np.argmax(row) + 1)


# ------------------------------------------------------------
# UTILITAIRES GÉNÉRAUX
# ------------------------------------------------------------
def cap_level(x):
    try:
        v = int(x)
    except Exception:
        return np.nan
    return min(5, max(1, v))

def _safe_tag(s: str) -> str:
    return "".join(ch if ch.isalnum() or ch in "-_." else "_" for ch in str(s))

def load_T_from_csv_soft(path: str) -> np.ndarray:
    """Retourne None si absent ou mauvaise taille (fallback safe)."""
    if not os.path.exists(path):
        return None
    try:
        T = pd.read_csv(path, index_col=0)
        arr = T.values.astype(float)
        if arr.shape != (5, 5):
            return None
        return arr
    except Exception:
        return None

def empirical_transition_matrix_from_pretest_final(df: pd.DataFrame) -> np.ndarray:
    """Estimation empirique de P(Final=j | Pretest=i) (Action A — Standard)."""
    if "Final_i" not in df.columns:
        if "Final" in df.columns:
            df = df.copy()
            df["Final_i"] = df["Final"].apply(cap_level)
        else:
            print("[WARN] Pas de colonne Final/Final_i. Matrice identité.")
            return np.eye(N_LEVELS)

    if "Pretest_i" not in df.columns:
        if "Pretest" in df.columns:
            df = df.copy()
            df["Pretest_i"] = df["Pretest"].apply(cap_level)
        else:
            print("[WARN] Pas de colonne Pretest/Pretest_i. Matrice identité.")
            return np.eye(N_LEVELS)

    mat_counts = np.zeros((N_LEVELS, N_LEVELS), dtype=float)
    dfx = df.dropna(subset=["Pretest_i", "Final_i"]).copy()

    for _, r in dfx.iterrows():
        i = int(r["Pretest_i"]) - 1
        j = int(r["Final_i"]) - 1
        if 0 <= i < N_LEVELS and 0 <= j < N_LEVELS:
            mat_counts[i, j] += 1.0

    T = np.zeros_like(mat_counts)
    for i in range(N_LEVELS):
        s = mat_counts[i, :].sum()
        if s > 0:
            T[i, :] = mat_counts[i, :] / s
        else:
            T[i, :] = 1.0 / N_LEVELS

    # état 5 absorbant
    T[ABSORBING-1, :] = 0.0
    T[ABSORBING-1, ABSORBING-1] = 1.0
    return T

def make_intensive_matrix_from_standard(TA: np.ndarray,
                                        diag_shrink: float = 0.80,
                                        regress_shrink: float = 0.70,
                                        boost_to_absorb: float = 1.25) -> np.ndarray:
    """Action B — Intensive : contre-factuel normatif (non causal)."""
    TB = TA.copy().astype(float)
    for i in range(N_LEVELS):
        if i == ABSORBING-1:
            continue
        for j in range(N_LEVELS):
            if j == i:
                TB[i, j] *= diag_shrink
            elif j < i:
                TB[i, j] *= regress_shrink
        TB[i, ABSORBING-1] *= boost_to_absorb
        s = TB[i, :].sum()
        TB[i, :] = (TB[i, :] / s) if s > 0 else (1.0 / N_LEVELS)

    TB[ABSORBING-1, :] = 0.0
    TB[ABSORBING-1, ABSORBING-1] = 1.0
    return TB

def bellman_value_iteration_time_cost(TA: np.ndarray, TB: np.ndarray, cost_hours: np.ndarray,
                                      max_iter=20000, tol=1e-12):
    """
    Bellman en HEURES :
      V(5)=0
      V(s)=c(s) + min_a sum_{s'} P(s'|s,a) V(s')
    """
    V = np.zeros(N_LEVELS, dtype=float)
    V[:4] = float(np.max(cost_hours[:4])) * 10.0
    pol = {s: A for s in TRANSIENT}

    for _ in range(max_iter):
        V_old = V.copy()
        for s in TRANSIENT:
            c = float(cost_hours[s-1])
            qA = c + float(np.dot(TA[s-1, :], V_old))
            qB = c + float(np.dot(TB[s-1, :], V_old))
            if qB < qA:
                V[s-1] = qB
                pol[s] = B
            else:
                V[s-1] = qA
                pol[s] = A
        V[ABSORBING-1] = 0.0
        if np.max(np.abs(V - V_old)) < tol:
            break
    return V, pol

def choose_action(level: int, policy: dict) -> str:
    return policy.get(int(level), A)

def next_state_sample(level: int, action: str, TA: np.ndarray, TB: np.ndarray, rng: np.random.Generator) -> int:
    row = TA[level-1, :] if action == A else TB[level-1, :]
    row = np.clip(row, 0.0, 1.0)
    s = float(row.sum())
    row = (row / s) if s > EPS else (np.ones_like(row) / len(row))
    return int(rng.choice(np.arange(1, N_LEVELS+1), p=row))

def next_state_most_likely(level: int, action: str, TA: np.ndarray, TB: np.ndarray) -> int:
    row = TA[level-1, :] if action == A else TB[level-1, :]
    return int(np.argmax(row) + 1)


# ------------------------------------------------------------
# L1 — ACTIVITÉS : vecteur FIXE de 10 activités par niveau + vecteur par jour
# ------------------------------------------------------------

LEVEL_PREFIX = {
    1: "Deb",
    2: "Unchiffe",
    3: "Deuxchiffres",
    4: "TroisA",
    5: "TroisB",
}

DF_L1_ACT = None              # dataframe onglet "Activités"
LEVEL_ACT_COLS_10 = {}        # level -> liste 10 colonnes activités

def _pick_10_activity_cols_for_level(df_act: pd.DataFrame, level: int):
    """
    Retourne 10 colonnes d'activités pour un niveau, dans l'ordre du fichier.
    Si <10 => padding avec "Unused".
    """
    pref = LEVEL_PREFIX[int(level)]
    cols = [c for c in df_act.columns if c not in ["Semaine", "Jour"]]
    cols = [_normalize_colname(c) for c in cols]

    # IMPORTANT: pandas garde les noms originaux; on va mapper normalisé -> original
    norm_to_orig = {_normalize_colname(c): c for c in df_act.columns}

    # récupérer colonnes commençant par le prefix
    all_norm = [c for c in cols if c.startswith(pref)]
    chosen_norm = all_norm[:10]

    chosen = [norm_to_orig[c] for c in chosen_norm if c in norm_to_orig]
    # padding si nécessaire
    if len(chosen) < 10:
        for k in range(len(chosen)+1, 11):
            chosen.append(f"{pref}_Unused{k}")  # colonne fictive (affichage)
    return chosen

def load_L1_activities_or_fallback():
    """
    Charge l'onglet 'Activités' du fichier L1 et prépare 10 colonnes par niveau.
    """
    global DF_L1_ACT, LEVEL_ACT_COLS_10
    DF_L1_ACT = None
    LEVEL_ACT_COLS_10 = {}

    loaded = False
    if os.path.exists(L1_XLSX_PATH):
        try:
            DF_L1_ACT = pd.read_excel(L1_XLSX_PATH, sheet_name="Activités")
            # normalise colonnes
            DF_L1_ACT.columns = [_normalize_colname(c) for c in DF_L1_ACT.columns]

            # convert Jour en int
            if "Jour" in DF_L1_ACT.columns:
                DF_L1_ACT["Jour"] = DF_L1_ACT["Jour"].apply(lambda x: int(x) if pd.notna(x) else x)

            for lvl in LEVELS:
                LEVEL_ACT_COLS_10[int(lvl)] = _pick_10_activity_cols_for_level(DF_L1_ACT, int(lvl))

            loaded = True
        except Exception as e:
            DF_L1_ACT = None
            LEVEL_ACT_COLS_10 = {}
            print(f"[WARN] Lecture L1 échouée (sheet Activités). Err={e}")

    return loaded

def activities10_for_level(level: int):
    """
    Retourne la liste FIXE de 10 activités pour ce niveau.
    """
    level = int(level)
    if DF_L1_ACT is None:
        return [f"GenAct{j}_Lv{level}" for j in range(1, 11)]
    return list(LEVEL_ACT_COLS_10[level])

def _day_to_L1_day(day: int) -> int:
    """
    L1 ne contient que 10 jours.
    Pour 60 jours: on boucle => 1..10,1..10,...
    """
    day = int(day)
    return ((day - 1) % 10) + 1

def vector10_for_level_day(level: int, day: int):
    """
    Construit le vecteur taille 10 pour (niveau, day),
    en utilisant le jour équivalent dans L1 (cycle 10 jours).
    """
    level = int(level)
    day_eff = _day_to_L1_day(day)

    acts10 = activities10_for_level(level)
    v = [0] * 10

    if DF_L1_ACT is None:
        DISPERSED_PATTERNS = [
            [0, 3, 6, 9],
            [1, 4, 7, 0],
            [2, 5, 8, 1],
            [3, 6, 9, 2],
            [4, 7, 0, 3],
            [5, 8, 1, 4],
            [6, 9, 2, 5],
            [7, 0, 3, 6],
            [8, 1, 4, 7],
            [9, 2, 5, 8],
        ]
        ORDER_PATTERNS = [
            [1, 2, 3, 4],
            [4, 1, 3, 2],
            [2, 4, 1, 3],
            [3, 1, 4, 2],
            [2, 1, 4, 3],
            [4, 3, 2, 1],
            [1, 3, 2, 4],
            [3, 4, 1, 2],
            [1, 4, 2, 3],
            [2, 3, 4, 1],
        ]


        # fallback : change quand même avec day (petite variation)
        # (ça évite 60 jours identiques même en fallback)
        # index pattern selon day_eff (1..10)
        idxs = DISPERSED_PATTERNS[(day_eff - 1) % len(DISPERSED_PATTERNS)]
        order = ORDER_PATTERNS[(day_eff - 1) % len(ORDER_PATTERNS)]

        # appliquer positions selon l'ordre choisi
        for col_idx, pos in zip(idxs, order):
            v[col_idx] = pos

        return v, acts10, day_eff

    # ligne du jour (jour 1..10 dans DF)
    row = DF_L1_ACT.loc[DF_L1_ACT["Jour"] == day_eff]
    if len(row) == 0:
        return v, acts10, day_eff
    row = row.iloc[0]

    # remplir v via les 10 colonnes
    for i, col in enumerate(acts10):
        if col not in DF_L1_ACT.columns:
            continue
        val = row.get(col, np.nan)
        if pd.isna(val):
            continue
        try:
            iv = int(val)
        except Exception:
            continue
        if 1 <= iv <= 4:
            v[i] = iv

    return v, acts10, day_eff

def action_to_sequence(level: int, day: int, action: str):
    """
    Renvoie pos1..pos4 (noms) du planning L1 pour (niveau, day).
    NB: L1 ne dépend pas de A/B, mais on garde action dans la signature (cohérence).
    """
    v, acts10, day_eff = vector10_for_level_day(level, day)
    pos_to_act = {}
    for idx, p in enumerate(v):
        if p in [1, 2, 3, 4]:
            pos_to_act[int(p)] = acts10[idx]

    return {
        "pos1": pos_to_act.get(1, ""),
        "pos2": pos_to_act.get(2, ""),
        "pos3": pos_to_act.get(3, ""),
        "pos4": pos_to_act.get(4, ""),
        "_day_eff_L1": day_eff,  # debug utile
    }

# Charger L1 maintenant
loaded = load_L1_activities_or_fallback()

# ------------------------------------------------------------
# (3) POMDP auto-stop — Utilitaires et Moteur
# ------------------------------------------------------------
def normalize(p: np.ndarray) -> np.ndarray:
    s = float(p.sum())
    if s <= 0:
        return np.ones_like(p) / len(p)
    return p / s

def belief_init_from_observation(obs_level: int, O_emit: np.ndarray, prior=None) -> np.ndarray:
    if prior is None:
        prior = np.ones(O_emit.shape[1], dtype=float) / O_emit.shape[1]
    b = prior * O_emit[obs_level-1, :]
    return normalize(b)

def belief_predict(b: np.ndarray, Th: np.ndarray) -> np.ndarray:
    return normalize(b.dot(Th))

def belief_update(b_pred: np.ndarray, obs_level: int, O_emit: np.ndarray) -> np.ndarray:
    b_new = b_pred * O_emit[obs_level-1, :]
    return normalize(b_new)

def most_likely_observation_from_belief(b: np.ndarray, O_emit: np.ndarray) -> int:
    p_obs = O_emit.dot(b)
    return int(np.argmax(p_obs) + 1)

def build_plan_pomdp_auto_stop(start_obs_level: int,
                               O_emit: np.ndarray,
                               T_hidden_A: np.ndarray,
                               T_hidden_B: np.ndarray,
                               pi_hidden_star: dict,
                               hidden_names=("Faible","Maitrise"),
                               mode_obs: str = "most_likely",
                               seed: int = 42,
                               max_days: int = MAX_DAYS_CAP):
    rng = np.random.default_rng(seed)
    H0, H1 = 0, 1

    b = belief_init_from_observation(int(start_obs_level), O_emit)
    plan = []
    obs_level = int(start_obs_level)

    for day in range(1, max_days+1):
        z_hat = int(np.argmax(b))
        if z_hat == H1:
            break

        action = pi_hidden_star.get(z_hat, A)

        #  FIX: signature correct (level, day, action)
        seq = action_to_sequence(obs_level, day, action)

        plan.append({
            "Day": day,
            "ObsLevel": obs_level,
            "z_hat": hidden_names[z_hat],
            "belief_Faible": float(b[H0]),
            "belief_Maitrise": float(b[H1]),
            "Action": action,
            "pos1": seq.get("pos1"),
            "pos2": seq.get("pos2"),
            "pos3": seq.get("pos3"),
            "pos4": seq.get("pos4"),
        })

        Th = T_hidden_A if action == A else T_hidden_B
        b_pred = belief_predict(b, Th)

        if mode_obs == "sample":
            p_obs = O_emit.dot(b_pred)
            p_obs = p_obs / max(EPS, float(p_obs.sum()))
            obs_level = int(rng.choice(np.arange(1, N_LEVELS+1), p=p_obs))
        else:
            obs_level = most_likely_observation_from_belief(b_pred, O_emit)

        b = belief_update(b_pred, obs_level, O_emit)

    reached = (int(np.argmax(b)) == H1)
    return plan, reached


def print_and_save_plan_pomdp_variable(plan_rows,
                                       technique: str,
                                       by_model: str,
                                       start_obs_level: int,
                                       target_level: int,
                                       filename_prefix: str,
                                       mode_label: str):
    X = len(plan_rows)
    title = (f"Best sequences of activities selected for {X} days (auto-stop) "
             f"(Start {start_obs_level}, Target {target_level}) :  {technique} |  byModel={by_model} |  mode={mode_label}")
    print(title)

    acts10_header = activities10_for_level(start_obs_level)
    print("Activities(10) fixed order:")
    for k, a in enumerate(acts10_header, start=1):
        print(f"  {k:02d}. {a}")

    lines = [title, "Activities(10) fixed order:"]
    lines += [f"  {k:02d}. {a}" for k, a in enumerate(acts10_header, start=1)]

    out_rows = []
    for r in plan_rows:
        day = int(r.get("Day", 0))
        obs_level = int(r.get("ObsLevel", start_obs_level))

        v10, _, l1_day = vector10_for_level_day(obs_level, day)

        print(f"Day {day} (L1-Day {l1_day}) [ObsLevel={obs_level} z_hat={r.get('z_hat','')} Action={r.get('Action','')}] : {v10}")
        lines.append(f"Day {day} (L1-Day {l1_day}) [ObsLevel={obs_level} z_hat={r.get('z_hat','')} Action={r.get('Action','')}] : {v10}")

        out_rows.append({
            "Day": day,
            "L1_DayUsed": l1_day,
            "StartObsLevel": start_obs_level,
            "TargetLevel": target_level,
            "Technique": technique,
            "ByModel": by_model,
            "Mode": mode_label,
            "ObsLevel": obs_level,
            "z_hat": r.get("z_hat",""),
            "belief_Faible": r.get("belief_Faible",""),
            "belief_Maitrise": r.get("belief_Maitrise",""),
            "Action": r.get("Action",""),
            "pos1": r.get("pos1",""),
            "pos2": r.get("pos2",""),
            "pos3": r.get("pos3",""),
            "pos4": r.get("pos4",""),
            "Vector10": json.dumps(v10, ensure_ascii=False),
            "HoursPerDay": HOURS_PER_DAY,
            "CumHoursIfCompleted": round(day * HOURS_PER_DAY, 2),
        })

    safe_model = _safe_tag(by_model)
    out_txt = out_path(f"{filename_prefix}_autoStop_Start{start_obs_level}_Target{target_level}__byModel-{safe_model}__{mode_label}.txt")
    out_csv = out_path(f"{filename_prefix}_autoStop_Start{start_obs_level}_Target{target_level}__byModel-{safe_model}__{mode_label}.csv")

    with open(out_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(lines) + "\n")
        f.write("\n(0 = non sélectionnée ce jour ; 1..4 = position de l’activité dans la journée)\n")

    pd.DataFrame(out_rows).to_csv(out_csv, index=False, encoding="utf-8-sig")
    return out_txt, out_csv, X


# ============================================================
# (4) CHARGEMENTS / POLITIQUES — Exécution
# ============================================================
print("\n==================== SECTION 6 — AUTO-STOP PLANNING (L1=10 activités FIXES) ====================")
print(f"OUT_DIR = {OUT_DIR}")
print(f"HOURS_PER_DAY = {HOURS_PER_DAY}h")
print(f"START_LEVEL={START_LEVEL}, TARGET_LEVEL={TARGET_LEVEL}, MAX_DAYS_CAP={MAX_DAYS_CAP}")
print(f"L1_XLSX_PATH={L1_XLSX_PATH} | loaded={DF_L1_ACT is not None}")

# (A) Matrices T_A / T_B (L4 sinon reconstruction L3)
T_A = load_T_from_csv_soft(TA_CSV)
T_B = load_T_from_csv_soft(TB_CSV)

if (T_A is None) or (T_B is None):
    if os.path.exists(L3_PATH):
        df = pd.read_csv(L3_PATH)
        if "Pretest_i" not in df.columns and "Pretest" in df.columns:
            df["Pretest_i"] = df["Pretest"].apply(cap_level)
        print("[INFO] L4 CSV introuvables -> reconstruction de T_A/T_B depuis L3.")
        T_A = empirical_transition_matrix_from_pretest_final(df)
        T_B = make_intensive_matrix_from_standard(T_A)
    else:
        print("[WARN] L3 introuvable et L4 absent -> matrices identité fictives.")
        T_A = np.eye(N_LEVELS)
        T_B = np.eye(N_LEVELS)
else:
    print("[INFO] T_A/T_B chargées depuis les artefacts L4 CSV.")

# (B) Politiques (SECTION5 JSON sinon Bellman local) : Charger policy Section 5
pi_hours_star = None
pi_ml_state = None
SECTION5_data = {}

if os.path.exists(SECTION5_JSON):
    try:
        with open(SECTION5_JSON, "r", encoding="utf-8") as f:
            SECTION5_data = json.load(f)

        if isinstance(SECTION5_data.get("pi_hours_star", None), dict):
            pi_hours_star = {int(k): v for k, v in SECTION5_data["pi_hours_star"].items()}
            print("[INFO] pi_hours_star chargée depuis SECTION5 JSON.")

        if isinstance(SECTION5_data.get("pi_ml_state", None), dict):
            pi_ml_state = {int(k): v for k, v in SECTION5_data["pi_ml_state"].items()}
            print("[INFO] pi_ml_state chargée depuis SECTION5 JSON.")

    except Exception as e:
        print(f"[WARN] Impossible de lire SECTION5 JSON : {e}")

if pi_hours_star is None:
    print("[INFO] pi_hours_star absente -> Bellman local (coût=2h/étape).")
    cost_hours_vec = np.array([HOURS_PER_DAY]*4 + [0.0], dtype=float)
    _, pi_hours_star = bellman_value_iteration_time_cost(T_A, T_B, cost_hours_vec)

print("\n--- Synthèse des Politiques Décisionnelles ---")
print(f"π*_hours (Optimal Temps) : {pi_hours_star}")
print(f"π_ML_state (ML Observé)  : {pi_ml_state}")

# ============================================================
# (5) RUN MDP auto-stop
# ============================================================
policy_opt = pi_hours_star

# ------------------------------------------------------------
# IMPORTANT : redéfinir build_plan_mdp_auto_stop ICI
# pour éviter NameError si cellules non exécutées
# ------------------------------------------------------------
def build_plan_mdp_auto_stop(start_level: int,
                            policy: dict,
                            TA: np.ndarray, TB: np.ndarray,
                            mode: str = "most_likely",
                            seed: int = 42,
                            stop_at: int = 5,
                            max_days: int = MAX_DAYS_CAP):
    """
    Simule une trajectoire MDP jour par jour.
    La séquence affichée provient de L1 (10 activités fixes) par (niveau, jour).
    """
    rng = np.random.default_rng(seed)
    current = int(start_level)
    plan = []

    for day in range(1, max_days+1):
        if current >= stop_at:
            break

        action = choose_action(current, policy)

        # IMPORTANT: on fournit day aussi
        seq = action_to_sequence(current, day, action)

        plan.append({
            "Day": day,
            "Level": current,
            "Action": action,
            "L1_day_used": seq.get("_day_eff_L1", None),
            "pos1": seq.get("pos1"),
            "pos2": seq.get("pos2"),
            "pos3": seq.get("pos3"),
            "pos4": seq.get("pos4"),
            "vector10": vector10_for_level_day(current, day)[0],
            "acts10": activities10_for_level(current),
        })

        if mode == "sample":
            current = next_state_sample(current, action, TA, TB, rng)
        else:
            current = next_state_most_likely(current, action, TA, TB)

    reached = (current >= stop_at)
    return plan, reached, current

# Scénario 1 : most_likely
print("\n[SCÉNARIO] Baseline MDP — POLICY=π*_hours (Most Likely) :\n")
plan_opt_ml, reached_opt, final_state_opt = build_plan_mdp_auto_stop(
    start_level=START_LEVEL,
    policy=policy_opt,
    TA=T_A, TB=T_B,
    mode="most_likely",
    seed=42,
    stop_at=ABSORBING,
    max_days=MAX_DAYS_CAP
)

def print_and_save_plan_variable_days(plan_rows,
                                      technique: str,
                                      by_model: str,
                                      start_level: int,
                                      target_level: int,
                                      filename_prefix: str,
                                      mode_label: str):
    X = len(plan_rows)
    title = (f"Best sequences of activities selected for {X} days (auto-stop) "
             f"(Start {start_level}, Target {target_level}) :  {technique} | byModel={by_model} | mode={mode_label}")
    print(title)

    lines = [title]
    out_rows = []

    for i, r in enumerate(plan_rows, start=1):
        lvl = int(r.get("Level", start_level))  # <= IMPORTANT: niveau du jour
        base_acts10 = activities10_for_level(lvl)

        seq = {"pos1": r.get("pos1"), "pos2": r.get("pos2"), "pos3": r.get("pos3"), "pos4": r.get("pos4")}
        v = [0] * 10

        for pos in [1, 2, 3, 4]:
            a = seq.get(f"pos{pos}")
            if a in base_acts10:
                v[base_acts10.index(a)] = pos

        print(f"Day {i}: {v}")
        lines.append(f"Day {i}: {v}")

        out_rows.append({
            "Day": i,
            "StartLevel": start_level,
            "TargetLevel": target_level,
            "Technique": technique,
            "ByModel": by_model,
            "Mode": mode_label,
            "LevelUsedForDecision": lvl,
            "Action": r.get("Action", ""),
            "L1_day_used": r.get("L1_day_used", ""),
            "ActivityVector10": json.dumps(v, ensure_ascii=False),
            "Activities10_Order": json.dumps(base_acts10, ensure_ascii=False),
            "pos1": seq.get("pos1", ""),
            "pos2": seq.get("pos2", ""),
            "pos3": seq.get("pos3", ""),
            "pos4": seq.get("pos4", ""),
            "HoursPerDay": HOURS_PER_DAY,
            "CumHoursIfCompleted": round(i * HOURS_PER_DAY, 2),
        })

    safe_model = _safe_tag(by_model)
    out_txt = out_path(f"{filename_prefix}_autoStop_Start{start_level}_Target{target_level}__byModel-{safe_model}__{mode_label}.txt")
    out_csv = out_path(f"{filename_prefix}_autoStop_Start{start_level}_Target{target_level}__byModel-{safe_model}__{mode_label}.csv")

    with open(out_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(lines) + "\n")
        f.write("\n(0 = non sélectionnée ce jour ; 1..4 = position de l’activité)\n")

    pd.DataFrame(out_rows).to_csv(out_csv, index=False, encoding="utf-8-sig")
    return out_txt, out_csv, X

print("\n[SCÉNARIO] Baseline MDP — POLICY=π*_hours (Most Likely) :\n")
opt_txt, opt_csv, opt_X = print_and_save_plan_variable_days(
    plan_opt_ml,
    technique=TECHNIQUE_MDP,
    by_model=BY_MODEL_NAME,
    start_level=START_LEVEL,
    target_level=ABSORBING,
    filename_prefix="best_sequences_MDP_OPT",
    mode_label="most_likely"
)

print(f"\n[INFO] Absorption atteinte ? {reached_opt} | État final : {final_state_opt} | "
      f"X_days : {opt_X} | Heures totales : {opt_X*HOURS_PER_DAY:.1f}h")

# Scénario 2 : sample
plan_opt_sample, reached_opt_s, final_state_opt_s = build_plan_mdp_auto_stop(
    start_level=START_LEVEL,
    policy=policy_opt,
    TA=T_A, TB=T_B,
    mode="sample",
    seed=42,
    stop_at=ABSORBING,
    max_days=MAX_DAYS_CAP
)

print("\n[SCÉNARIO] Baseline MDP — POLICY=π*_hours (Sample) :\n")
optS_txt, optS_csv, optS_X = print_and_save_plan_variable_days(
    plan_opt_sample,
    technique=TECHNIQUE_MDP,
    by_model=BY_MODEL_NAME,
    start_level=START_LEVEL,
    target_level=ABSORBING,
    filename_prefix="best_sequences_MDP_OPT",
    mode_label="sample_seed42"
)

print(f"\n[INFO] (Stochastique) Absorption atteinte ? {reached_opt_s} | "
      f"X_days : {optS_X} | Heures totales : {optS_X*HOURS_PER_DAY:.1f}h")

# Scénario 3 : politique ML si disponible
ml_txt = ml_csv = None
ml_X = None
reached_ml = None
final_state_ml = None

if isinstance(pi_ml_state, dict) and len(pi_ml_state) > 0:
    plan_ml, reached_ml, final_state_ml = build_plan_mdp_auto_stop(
        start_level=START_LEVEL,
        policy=pi_ml_state,
        TA=T_A, TB=T_B,
        mode="most_likely",
        seed=42,
        stop_at=ABSORBING,
        max_days=MAX_DAYS_CAP
    )

    print("\n[SCÉNARIO] Baseline MDP — POLICY=π_ML_state (Most Likely) :\n")
    ml_txt, ml_csv, ml_X = print_and_save_plan_variable_days(
        plan_ml,
        technique=TECHNIQUE_MDP,
        by_model=BY_MODEL_NAME,
        start_level=START_LEVEL,
        target_level=ABSORBING,
        filename_prefix="best_sequences_MDP_piML",
        mode_label="most_likely"
    )

    print(f"\n[INFO] π_ML_state atteint maîtrise ? {reached_ml} | "
          f"X_days : {ml_X} | Heures totales : {ml_X*HOURS_PER_DAY:.1f}h")
else:
    print("\n[INFO] Politique π_ML_state non disponible (Section 4/5).")

# ============================================================
# (6) RUN POMDP auto-stop (si matrices latentes disponibles)
# ============================================================
p_txt = p_csv = None
p_X = None
reached_p = None

if all(k in globals() for k in ["O_emit", "T_hidden_A", "T_hidden_B", "pi_hidden_star"]):
    if isinstance(pi_hidden_star, dict) and len(pi_hidden_star) > 0:
        any_key = list(pi_hidden_star.keys())[0]
        if not isinstance(any_key, int):
            pi_hidden_star_int = {0: pi_hidden_star.get("Faible", A), 1: pi_hidden_star.get("Maitrise", A)}
        else:
            pi_hidden_star_int = pi_hidden_star
    else:
        pi_hidden_star_int = {0: A, 1: A}

    plan_pomdp_ml, reached_p = build_plan_pomdp_auto_stop(
        start_obs_level=START_LEVEL,
        O_emit=O_emit,
        T_hidden_A=T_hidden_A,
        T_hidden_B=T_hidden_B,
        pi_hidden_star=pi_hidden_star_int,
        hidden_names=("Faible", "Maitrise"),
        mode_obs="most_likely",
        seed=42,
        max_days=MAX_DAYS_CAP
    )

    print("\n[SCÉNARIO] POMDP approx (HMM belief + projection) :\n")
    p_txt, p_csv, p_X = print_and_save_plan_pomdp_variable(
        plan_pomdp_ml,
        technique=TECHNIQUE_POMDP,
        by_model=BY_MODEL_NAME,
        start_obs_level=START_LEVEL,
        target_level=ABSORBING,
        filename_prefix="best_sequences_POMDP_OPT",
        mode_label="most_likely"
    )

    print(f"\n[INFO] POMDP maîtrise latente atteinte ? {reached_p} | "
          f"X_days : {p_X} | Heures totales : {p_X*HOURS_PER_DAY:.1f}h")
else:
    print("\n[INFO] POMDP ignoré : matrices latentes manquantes.")

# ============================================================
# (7) EXPORT SUMMARY JSON
# ============================================================
summary6 = {
    "OUT_DIR": OUT_DIR,
    "MAX_DAYS_CAP": MAX_DAYS_CAP,
    "HOURS_PER_DAY": HOURS_PER_DAY,
    "START_LEVEL": START_LEVEL,
    "TARGET_LEVEL": TARGET_LEVEL,
    "pi_hours_star": pi_hours_star,
    "pi_ml_state": (pi_ml_state if isinstance(pi_ml_state, dict) else None),

    "MDP_OPT_most_likely": {
        "X_days": opt_X, "total_hours": float(opt_X * HOURS_PER_DAY) if opt_X else 0,
        "reached": bool(reached_opt), "final_state": int(final_state_opt),
        "txt": opt_txt, "csv": opt_csv
    },
    "MDP_OPT_sample_seed42": {
        "X_days": optS_X, "total_hours": float(optS_X * HOURS_PER_DAY) if optS_X else 0,
        "reached": bool(reached_opt_s), "final_state": int(final_state_opt_s),
        "txt": optS_txt, "csv": optS_csv
    },
    "MDP_piML_most_likely": (
        {
            "X_days": ml_X, "total_hours": float(ml_X * HOURS_PER_DAY) if ml_X else 0,
            "reached": bool(reached_ml), "final_state": int(final_state_ml),
            "txt": ml_txt, "csv": ml_csv
        } if ml_X is not None else None
    ),
    "POMDP_OPT_most_likely": (
        {
            "X_days": p_X, "total_hours": float(p_X * HOURS_PER_DAY) if p_X else 0,
            "reached": bool(reached_p),
            "txt": p_txt, "csv": p_csv
        } if p_X is not None else None
    )
}

with open(out_path("SECTION6_autostop_summary.json"), "w", encoding="utf-8") as f:
    json.dump(summary6, f, ensure_ascii=False, indent=2)

print("\n" + "="*80)
print(f" SECTION 6 TERMINÉE — Synthèse exportée : {out_path('SECTION6_autostop_summary.json')}")
print("="*80)

[WARN] Lecture L1 échouée (sheet Activités). Err=name '_normalize_colname' is not defined

OUT_DIR = ./out/202602/out_pomdp
HOURS_PER_DAY = 2.0h
START_LEVEL=1, TARGET_LEVEL=5, MAX_DAYS_CAP=60
L1_XLSX_PATH=./data/2025-08/L1.20250818-DataMathsElysa.xlsx | loaded=False
[INFO] L4 CSV introuvables -> reconstruction de T_A/T_B depuis L3.
[INFO] pi_hours_star chargée depuis SECTION5 JSON.

--- Synthèse des Politiques Décisionnelles ---
π*_hours (Optimal Temps) : {1: 'B', 2: 'B', 3: 'B', 4: 'B'}
π_ML_state (ML Observé)  : None

[SCÉNARIO] Baseline MDP — POLICY=π*_hours (Most Likely) :


[SCÉNARIO] Baseline MDP — POLICY=π*_hours (Most Likely) :

Best sequences of activities selected for 60 days (auto-stop) (Start 1, Target 5) :  Baseline MDP (fully observed) | byModel=PolicyTimeOptimal | mode=most_likely
Day 1: [1, 0, 0, 2, 0, 0, 3, 0, 0, 4]
Day 2: [2, 4, 0, 0, 1, 0, 0, 3, 0, 0]
Day 3: [0, 3, 2, 0, 0, 4, 0, 0, 1, 0]
Day 4: [0, 0, 2, 3, 0, 0, 1, 0, 0, 4]
Day 5: [4, 0, 0, 3, 2, 0, 0, 1, 0, 0]
Day

In [50]:
# -*- coding: utf-8 -*-
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import json
import numpy as np
import pandas as pd

EPS = 1e-12
A, B = "A", "B"
LEVELS = [1,2,3,4,5]
TRANSIENT = [1,2,3,4]
ABSORBING = 5
N_LEVELS = 5

#  : 4 activités/jour, 2h/activité
HOURS_PER_ACTIVITY = 2.0
ACTIVITIES_PER_DAY = 4.0
HOURS_PER_DAY = HOURS_PER_ACTIVITY * ACTIVITIES_PER_DAY  # 8h

# Monte-Carlo
MC_N = 2000
MC_MAX_DAYS = 120
MC_SEED = 42

# ------------------------------------------------------------
# (0) Helpers
# ------------------------------------------------------------
def _ensure_prob_row(row: np.ndarray) -> np.ndarray:
    row = np.array(row, dtype=float)
    row = np.clip(row, 0.0, 1.0)
    s = float(row.sum())
    if s <= EPS:
        return np.ones_like(row) / len(row)
    return row / s

def q_hours(level: int, action: str, V_hours: np.ndarray, TA: np.ndarray, TB: np.ndarray) -> float:
    """
    Q_hours(s,a) = cost(hours_per_day) + sum_{s'} P(s'|s,a) * V_hours(s')
    V_hours(5)=0
    """
    row = TA[level-1, :] if action == A else TB[level-1, :]
    row = _ensure_prob_row(row)
    return float(HOURS_PER_DAY + np.dot(row, V_hours))

def pick_best_action_by_q(level: int, V_hours: np.ndarray, TA: np.ndarray, TB: np.ndarray) -> dict:
    qA = q_hours(level, A, V_hours, TA, TB)
    qB = q_hours(level, B, V_hours, TA, TB)
    best = A if qA <= qB else B
    return {"qA_hours": qA, "qB_hours": qB, "best_action": best}

def mc_absorption_time_hours(start_level: int, action_fixed: str, TA: np.ndarray, TB: np.ndarray,
                             n: int = 1000, seed: int = 42, max_days: int = 120) -> np.ndarray:
    """
    Simule n trajectoires jusqu’à absorption (5), en appliquant action_fixed à chaque étape.
    Retourne un vecteur des temps (heures).
    """
    rng = np.random.default_rng(seed)
    times = []

    for _ in range(n):
        s = int(start_level)
        days = 0
        while s != ABSORBING and days < max_days:
            row = TA[s-1, :] if action_fixed == A else TB[s-1, :]
            row = _ensure_prob_row(row)
            s = int(rng.choice(np.arange(1, N_LEVELS+1), p=row))
            days += 1

        times.append(float(days * HOURS_PER_DAY))

    return np.array(times, dtype=float)

def summarize_mc(x: np.ndarray) -> dict:
    x = np.array(x, dtype=float)
    return {
        "mean": float(np.mean(x)),
        "median": float(np.median(x)),
        "p10": float(np.quantile(x, 0.10)),
        "p90": float(np.quantile(x, 0.90)),
        "min": float(np.min(x)),
        "max": float(np.max(x)),
    }

# ------------------------------------------------------------
# (1) Choisir V_hours à utiliser
# ------------------------------------------------------------
if "V_hours_star" in globals():
    Vh = np.array(V_hours_star, dtype=float)
    used_V_name = "V_hours_star"
elif "V_star" in globals():
    # fallback : V_star est en “pas” (jours). On approx en heures : pas * 8h
    Vh = np.array(V_star, dtype=float) * HOURS_PER_DAY
    used_V_name = "V_star_scaled_to_hours"
else:
    # dernier recours
    Vh = np.zeros(N_LEVELS, dtype=float)
    used_V_name = "none"

# ------------------------------------------------------------
# (2) Hypothesis check — via Q_hours (Bellman local)
# ------------------------------------------------------------
rows = []
for s in TRANSIENT:
    d = pick_best_action_by_q(s, Vh, T_A, T_B)
    rows.append({
        "Level": s,
        "BestAction_by_Qhours": d["best_action"],
        "Q_A_hours": round(d["qA_hours"], 3),
        "Q_B_hours": round(d["qB_hours"], 3),
        "Delta_B_minus_A_hours": round(d["qB_hours"] - d["qA_hours"], 3),  # <0 => B better
    })

df_q = pd.DataFrame(rows).sort_values("Level")
df_q.to_csv(out_path("SECTION7_hypothesis_level_summary.csv"), index=False, encoding="utf-8-sig")

print("\n==================== SECTION 7 — Hypothesis (Bellman/Q in hours) ====================")
print(f"[INFO] Value used: {used_V_name}")
print(df_q.to_string(index=False))
print("\nLecture  rapide :")
print("- Delta_B_minus_A_hours < 0  => B réduit l’espérance de temps (meilleur).")
print("- Delta_B_minus_A_hours > 0  => A est meilleur.")
print("=> Compare niveaux 1–2 vs 3–4 pour confirmer/infirmer l’hypothèse.\n")

# ------------------------------------------------------------
# (3) Hypothesis check — Monte-Carlo (A fixe vs B fixe)
# ------------------------------------------------------------
mc_rows = []
for s in TRANSIENT:
    tA = mc_absorption_time_hours(s, A, T_A, T_B, n=MC_N, seed=MC_SEED+s, max_days=MC_MAX_DAYS)
    tB = mc_absorption_time_hours(s, B, T_A, T_B, n=MC_N, seed=MC_SEED+100+s, max_days=MC_MAX_DAYS)

    sA = summarize_mc(tA)
    sB = summarize_mc(tB)

    mc_rows.append({
        "Level": s,
        "MC_N": MC_N,
        "A_mean_h": round(sA["mean"], 2),
        "B_mean_h": round(sB["mean"], 2),
        "A_median_h": round(sA["median"], 2),
        "B_median_h": round(sB["median"], 2),
        "B_minus_A_mean_h": round(sB["mean"] - sA["mean"], 2),   # <0 => B better
        "B_minus_A_median_h": round(sB["median"] - sA["median"], 2),
        "A_p10_h": round(sA["p10"], 2),
        "A_p90_h": round(sA["p90"], 2),
        "B_p10_h": round(sB["p10"], 2),
        "B_p90_h": round(sB["p90"], 2),
    })

df_mc = pd.DataFrame(mc_rows).sort_values("Level")
df_mc.to_csv(out_path("SECTION7_mc_summary.csv"), index=False, encoding="utf-8-sig")

print("\n==================== SECTION 7 — Hypothesis (Monte-Carlo absorption time) ====================")
print(df_mc.to_string(index=False))
print("\nLecture  rapide :")
print("- B_minus_A_mean_h < 0  => en moyenne, B mène plus vite à la maîtrise (niveau 5).")
print("- Les quantiles p10/p90 donnent l’incertitude (variabilité des trajectoires).\n")

# ------------------------------------------------------------
# (4) Données réelles — HoursTotal par niveau (descriptif)
# ------------------------------------------------------------
df_real = df.dropna(subset=["Pretest_i", "HoursTotal"]).copy()
df_real["Pretest_i"] = df_real["Pretest_i"].astype(int)
df_real["HoursTotal"] = pd.to_numeric(df_real["HoursTotal"], errors="coerce")

real_rows = []
for s in TRANSIENT:
    dfi = df_real[df_real["Pretest_i"] == s]
    if len(dfi) == 0:
        continue
    x = dfi["HoursTotal"].dropna().values.astype(float)
    if len(x) == 0:
        continue
    real_rows.append({
        "Level": s,
        "n_students": int(len(x)),
        "HoursTotal_mean": round(float(np.mean(x)), 2),
        "HoursTotal_median": round(float(np.median(x)), 2),
        "HoursTotal_p10": round(float(np.quantile(x, 0.10)), 2),
        "HoursTotal_p90": round(float(np.quantile(x, 0.90)), 2),
        "HoursTotal_min": round(float(np.min(x)), 2),
        "HoursTotal_max": round(float(np.max(x)), 2),
    })

df_real_sum = pd.DataFrame(real_rows).sort_values("Level")
df_real_sum.to_csv(out_path("SECTION7_real_hours_by_level.csv"), index=False, encoding="utf-8-sig")

print("\n==================== SECTION 7 — Réel (HoursTotal) par niveau ====================")
if len(df_real_sum) > 0:
    print(df_real_sum.to_string(index=False))
    print("\nLecture  :")
    print("- Ceci décrit le temps de remediation OBSERVÉ (heures) selon le niveau initial.")
    print("- Ça ne prouve pas A vs B si on n’a pas de label 'action réelle', mais ça contextualise la difficulté par niveau.\n")
else:
    print("[WARN] Pas de données HoursTotal exploitables.\n")

# ------------------------------------------------------------
# (5) π_ML : distribution des actions prédites par niveau (si dispo)
# ------------------------------------------------------------
ml_exported = False
if "pi_ml_state" in globals() and isinstance(pi_ml_state, dict):
    # pi_ml_state est une “politique stationnaire” (par niveau) => simple tableau
    ml_rows = []
    for s in TRANSIENT:
        ml_rows.append({
            "Level": s,
            "pi_ml_state_action": pi_ml_state.get(s, A)
        })
    df_ml = pd.DataFrame(ml_rows)
    df_ml.to_csv(out_path("SECTION7_ml_policy_by_level.csv"), index=False, encoding="utf-8-sig")
    ml_exported = True

    print("\n==================== SECTION 7 — π_ML_state (stationnaire) ====================")
    print(df_ml.to_string(index=False))
    print("\nLecture  :")
    print("- π_ML_state dit : 'si un élève est au niveau s, l’action la plus probable observée/prévue est A ou B'.\n")

# Si tu as une fonction de proba par ligne (Section 5), on peut aussi faire : moyenne P(B|features) par niveau
if "predict_action_proba_row" in globals() and callable(predict_action_proba_row):
    dfx = df.dropna(subset=["Pretest_i"]).copy()
    dfx["Pretest_i"] = dfx["Pretest_i"].astype(int)

    prows = []
    for s in TRANSIENT:
        dfi = dfx[dfx["Pretest_i"] == s].head(500).copy()  # cap sécurité
        if len(dfi) == 0:
            continue
        pB_list = []
        for _, r in dfi.iterrows():
            pr = predict_action_proba_row(r)  # expected dict or tuple
            # on accepte plusieurs formats robustes :
            if isinstance(pr, dict) and "pB" in pr:
                pB_list.append(float(pr["pB"]))
            elif isinstance(pr, (list, tuple)) and len(pr) >= 1:
                pB_list.append(float(pr[0]))
        if len(pB_list) == 0:
            continue

        prows.append({
            "Level": s,
            "n_rows_used": int(len(pB_list)),
            "mean_pB": round(float(np.mean(pB_list)), 4),
            "median_pB": round(float(np.median(pB_list)), 4),
        })

    if len(prows) > 0:
        df_pB = pd.DataFrame(prows).sort_values("Level")
        df_pB.to_csv(out_path("SECTION7_ml_probB_by_level.csv"), index=False, encoding="utf-8-sig")

        print("\n==================== SECTION 7 — π_ML(features): proba(B) par niveau ====================")
        print(df_pB.to_string(index=False))
        print("\nLecture  :")
        print("- mean_pB proche de 1 => le ML pense que B est souvent adapté pour ce niveau/ces profils.")
        print("- mean_pB proche de 0 => plutôt A.\n")

# ------------------------------------------------------------
# (6) Verdict automatique sur l’hypothèse (résumé)
# ------------------------------------------------------------
# Hypothèse : 1-2 => A ; 3-4 => B
hyp_ok_q = True
for s in [1,2]:
    act = df_q[df_q["Level"]==s]["BestAction_by_Qhours"].values
    if len(act) and act[0] != A:
        hyp_ok_q = False
for s in [3,4]:
    act = df_q[df_q["Level"]==s]["BestAction_by_Qhours"].values
    if len(act) and act[0] != B:
        hyp_ok_q = False

hyp_ok_mc = True
for s in [1,2]:
    v = df_mc[df_mc["Level"]==s]["B_minus_A_mean_h"].values
    if len(v) and float(v[0]) < 0:  # B faster for level 1/2
        hyp_ok_mc = False
for s in [3,4]:
    v = df_mc[df_mc["Level"]==s]["B_minus_A_mean_h"].values
    if len(v) and float(v[0]) > 0:  # A faster for level 3/4
        hyp_ok_mc = False

verdict = {
    "hypothesis_statement": "Levels 1-2 => A, Levels 3-4 => B",
    "check_Qhours_pass": bool(hyp_ok_q),
    "check_MC_pass": bool(hyp_ok_mc),
    "notes": {
        "Qhours": "best action by minimizing one-step Bellman Q in HOURS",
        "MC": f"mean absorption time (hours) via Monte-Carlo with N={MC_N}"
    }
}

with open(out_path("SECTION7_hypothesis_summary.json"), "w", encoding="utf-8") as f:
    json.dump(verdict, f, ensure_ascii=False, indent=2)

print("\n==================== SECTION 7 — Verdict hypothèse ====================")
print(json.dumps(verdict, ensure_ascii=False, indent=2))
print("\n Exports Section 7 :")
print("-", out_path("SECTION7_hypothesis_level_summary.csv"))
print("-", out_path("SECTION7_mc_summary.csv"))
print("-", out_path("SECTION7_real_hours_by_level.csv"))
if ml_exported:
    print("-", out_path("SECTION7_ml_policy_by_level.csv"))
print("-", out_path("SECTION7_hypothesis_summary.json"))



[INFO] Value used: V_hours_star
 Level BestAction_by_Qhours  Q_A_hours  Q_B_hours  Delta_B_minus_A_hours
     1                    B     29.081     26.900                 -2.181
     2                    B     26.962     24.525                 -2.436
     3                    B     19.136     17.135                 -2.001
     4                    B     19.275     16.793                 -2.482

Lecture  rapide :
- Delta_B_minus_A_hours < 0  => B réduit l’espérance de temps (meilleur).
- Delta_B_minus_A_hours > 0  => A est meilleur.
=> Compare niveaux 1–2 vs 3–4 pour confirmer/infirmer l’hypothèse.


 Level  MC_N  A_mean_h  B_mean_h  A_median_h  B_median_h  B_minus_A_mean_h  B_minus_A_median_h  A_p10_h  A_p90_h  B_p10_h  B_p90_h
     1  2000     29.82     23.45        24.0        16.0             -6.37                -8.0      8.0     64.0      8.0     48.0
     2  2000     27.38     20.54        24.0        16.0             -6.83                -8.0      8.0     56.0      8.0     40.0

# SECTION 8 — Analyse absorbante (Perdikaris)

In [52]:
# -*- coding: utf-8 -*-

import os
import json
import numpy as np
import pandas as pd

# ============================================================
# 0) PARAMS / SAFE DEFAULTS
# ============================================================
EPS = 1e-12

OUT_DIR = globals().get("OUT_DIR", "./out/202602/out_pomdp")
os.makedirs(OUT_DIR, exist_ok=True)

# prend la valeur existante dans ton notebook ; fallback = 2h
HOURS_PER_DAY = float(globals().get("HOURS_PER_DAY", 2.0))

states = [1, 2, 3, 4, 5]
TRANSIENT = [1, 2, 3, 4]
ABSORBING = 5

def out_path(fname: str) -> str:
    return os.path.join(OUT_DIR, f"L0-{fname}")

def _as_df(mat, idx, cols):
    return pd.DataFrame(mat, index=idx, columns=cols)

def _clip_and_renorm_rows(P):
    """Sécurise P : clip [0,1], renormalise lignes, force absorbant état 5."""
    P = np.asarray(P, dtype=float).copy()
    P = np.clip(P, 0.0, 1.0)
    for i in range(P.shape[0]):
        s = float(P[i, :].sum())
        if s <= EPS:
            P[i, :] = 1.0 / P.shape[1]
        else:
            P[i, :] /= s
    P[ABSORBING-1, :] = 0.0
    P[ABSORBING-1, ABSORBING-1] = 1.0
    return P

def _fmt(x, nd=4):
    try:
        return float(np.round(float(x), nd))
    except Exception:
        return x

def _pct(x, nd=1):
    return f"{_fmt(100.0*float(x), nd)}%"

# ============================================================
# 1) CHOIX DE LA MATRICE P (TA / TB / POLICY)
# ============================================================
MODE_P = globals().get("SECTION8_MODE_P", "TA")  # "TA" | "TB" | "POLICY"

T_A = globals().get("T_A", None)
T_B = globals().get("T_B", None)
pi_hours_star = globals().get("pi_hours_star", None)  # dict {level: 'A'/'B'}

if MODE_P == "TB":
    if T_B is None:
        raise NameError("MODE_P='TB' mais T_B n'est pas défini. Exécute Section 4/5/6 avant.")
    P = T_B
    P_label = "P = T_B (Intensive)"
elif MODE_P == "POLICY":
    if T_A is None or T_B is None:
        raise NameError("MODE_P='POLICY' mais T_A/T_B n'est pas défini.")
    if not isinstance(pi_hours_star, dict):
        raise NameError("MODE_P='POLICY' mais pi_hours_star (dict) n'est pas défini.")
    P = np.zeros((5, 5), dtype=float)
    for s in states:
        if s == ABSORBING:
            P[s-1, :] = 0.0
            P[s-1, ABSORBING-1] = 1.0
        else:
            a = pi_hours_star.get(int(s), "A")
            P[s-1, :] = (T_A[s-1, :] if a == "A" else T_B[s-1, :])
    P_label = "P = Pπ (Policy-induced from π*_hours)"
else:
    if T_A is None:
        raise NameError("MODE_P='TA' mais T_A n'est pas défini. Exécute Section 4/5/6 avant.")
    P = T_A
    P_label = "P = T_A (Standard)"

P = _clip_and_renorm_rows(P)

print("\n==================== SECTION 8 ====================")
print(f"OUT_DIR = {OUT_DIR}")
print(f"HOURS_PER_DAY = {HOURS_PER_DAY:.1f}h")
print(f"MODE_P = {MODE_P} | {P_label}")

# ============================================================
# (P) Matrice de transition
# ============================================================
print("\n--- (P) Matrice de transition ---")
dfP = _as_df(P, states, states)
print(dfP.to_string())

# ============================================================
# (Q, R, I) Forme canonique
# ============================================================
Q = P[np.ix_([s-1 for s in TRANSIENT], [s-1 for s in TRANSIENT])]   # 4x4
R = P[np.ix_([s-1 for s in TRANSIENT], [ABSORBING-1])]              # 4x1
I = np.eye(len(TRANSIENT), dtype=float)                             # 4x4

print("\n--- (Q) Transitoire → Transitoire ---")
dfQ = _as_df(Q, TRANSIENT, TRANSIENT)
print(dfQ.to_string())

print("\n--- (R) Transitoire → Absorbant (L5) ---")
dfR = _as_df(R.reshape(-1, 1), TRANSIENT, [ABSORBING])
print(dfR.to_string())

print("\n--- (I) Matrice identité ---")
dfI = _as_df(I, TRANSIENT, TRANSIENT)
print(dfI.to_string())

# ============================================================
# (N) Matrice fondamentale
# ============================================================
IQ = I - Q
try:
    N = np.linalg.inv(IQ)
    inv_used = "inv"
except np.linalg.LinAlgError:
    N = np.linalg.pinv(IQ)
    inv_used = "pinv"

print("\n--- (N) Matrice fondamentale : N = (I - Q)^(-1) ---")
dfN = _as_df(N, TRANSIENT, TRANSIENT)
print(dfN.to_string())
print(f"\n[INFO] inversion utilisée = {inv_used}")

# ============================================================
# (C) Comptes attendus : C = N
# ============================================================
C_counts = N.copy()

print("\n--- (C) Comptes attendus (C = N) ---")
print(_as_df(C_counts, TRANSIENT, TRANSIENT).to_string())

# ============================================================
# (H) Horizon moyen avant maîtrise : H = N * 1
# ============================================================
H_horizon_days = (N @ np.ones((len(TRANSIENT), 1))).reshape(-1, 1)
dfH = _as_df(H_horizon_days, TRANSIENT, ["H (days)"])

print("\n--- (H) Horizon moyen avant maîtrise (jours) ---")
print(dfH.to_string())

# ============================================================
# (B) Probabilité d'absorption en L5 : B = N * R
# ============================================================
B = (N @ R.reshape(-1, 1)).reshape(-1, 1)
dfB = _as_df(B, TRANSIENT, ["P(absorb L5)"])

print("\n--- (B) Probabilité d'atteindre L5 à terme ---")
print(dfB.to_string())

# ============================================================
# (t) Temps moyen jusqu’à absorption : t = N * 1
# ============================================================
t_days = (N @ np.ones((len(TRANSIENT), 1))).reshape(-1, 1)
t_hours = t_days * HOURS_PER_DAY
df_t_days = _as_df(t_days, TRANSIENT, ["t (days)"])
df_t_hours = _as_df(t_hours, TRANSIENT, ["t (hours)"])

print("\n--- (t) Temps moyen jusqu'à maîtrise (jours) ---")
print(df_t_days.to_string())

print("\n--- Temps moyen jusqu'à maîtrise (heures) ---")
print(df_t_hours.to_string())

# ============================================================
# Visites moyennes : bottleneck
# ============================================================
visit_total_by_start = N.sum(axis=1).reshape(-1, 1)
visit_avg_by_state = N.mean(axis=0).reshape(-1, 1)

df_visit_total = _as_df(visit_total_by_start, TRANSIENT, ["Total expected visits"])
df_visit_avg = _as_df(visit_avg_by_state, TRANSIENT, ["Avg visits of state"])

print("\n--- Visites totales moyennes avant maîtrise (par niveau initial) ---")
print(df_visit_total.to_string())

print("\n--- Visites moyennes par état (où on passe le plus de temps) ---")
print(df_visit_avg.to_string())

bottleneck_idx = int(np.argmax(visit_avg_by_state.reshape(-1)))
bottleneck_level = TRANSIENT[bottleneck_idx]
bottleneck_val = float(visit_avg_by_state[bottleneck_idx, 0])

# ============================================================
# (A) Retour/visite relative avant absorption : A_return[i,j] = N[i,j] / N[j,j]
# ============================================================
A_return = np.zeros_like(N)
diagN = np.diag(N)

for j in range(len(TRANSIENT)):
    denom = float(diagN[j])
    if denom <= EPS:
        A_return[:, j] = 0.0
    else:
        A_return[:, j] = N[:, j] / denom

dfA = _as_df(A_return, TRANSIENT, TRANSIENT)

print("\n--- (A) Retour/visite relative avant maîtrise ---")
print(dfA.to_string())

# ============================================================
# Indicateur de retours en arrière (max A vers niveaux < j)
# ============================================================
rows_back = []
for j, lvl_j in enumerate(TRANSIENT):
    back_vals = []
    for i, lvl_i in enumerate(TRANSIENT):
        if lvl_i < lvl_j:
            back_vals.append(float(A_return[i, j]))
    back_max = float(np.max(back_vals)) if back_vals else 0.0
    rows_back.append({"FromAroundLevel": lvl_j, "MaxReturnToLower": _fmt(back_max, 4)})

df_back = pd.DataFrame(rows_back)

print("\n--- Indicateur de retours en arrière (max A vers niveaux < j) ---")
print(df_back.to_string(index=False))

# ============================================================
# EXPORTS (JSON + CSV)
# ============================================================
out_data = {
    "meta": {
        "MODE_P": MODE_P,
        "P_label": P_label,
        "hours_per_day": HOURS_PER_DAY,
        "states": states,
        "transient": TRANSIENT,
        "absorbing": ABSORBING,
        "inv_used": inv_used
    },
    "P": P.tolist(),
    "Q": Q.tolist(),
    "R": R.reshape(-1, 1).tolist(),
    "I": I.tolist(),
    "N": N.tolist(),
    "C_counts": C_counts.tolist(),
    "H_horizon_days": H_horizon_days.tolist(),
    "B": B.tolist(),
    "t_days": t_days.tolist(),
    "t_hours": t_hours.tolist(),
    "A_return": A_return.tolist(),
    "visit_total_by_start": visit_total_by_start.tolist(),
    "visit_avg_by_state": visit_avg_by_state.tolist(),
    "backward_return_summary": df_back.to_dict(orient="records")
}

json_path = os.path.join(OUT_DIR, "L0-SECTION8_perdikaris_components_inline_examples.json")
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(out_data, f, ensure_ascii=False, indent=2)

dfP.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_P.csv"), encoding="utf-8-sig")
dfQ.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_Q.csv"), encoding="utf-8-sig")
dfR.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_R.csv"), encoding="utf-8-sig")
dfI.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_I.csv"), encoding="utf-8-sig")
dfN.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_N.csv"), encoding="utf-8-sig")
_as_df(C_counts, TRANSIENT, TRANSIENT).to_csv(os.path.join(OUT_DIR, "L0-SECTION8_C_counts.csv"), encoding="utf-8-sig")
dfH.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_H_days.csv"), encoding="utf-8-sig")
dfB.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_B.csv"), encoding="utf-8-sig")
df_t_days.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_t_days.csv"), encoding="utf-8-sig")
df_t_hours.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_t_hours.csv"), encoding="utf-8-sig")
dfA.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_A_return.csv"), encoding="utf-8-sig")
df_back.to_csv(os.path.join(OUT_DIR, "L0-SECTION8_backward_return_summary.csv"), index=False, encoding="utf-8-sig")

print("\n[OK] Exports Section 8 :")
print(" -", json_path)
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_P.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_Q.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_R.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_I.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_N.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_C_counts.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_H_days.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_B.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_t_days.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_t_hours.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_A_return.csv"))
print(" -", os.path.join(OUT_DIR, "L0-SECTION8_backward_return_summary.csv"))

print("\n==================== FIN SECTION 8 ====================")



OUT_DIR = ./out/202602/out_pomdp
HOURS_PER_DAY = 8.0h
MODE_P = TA | P = T_A (Standard)

--- (P) Matrice de transition ---
          1         2         3         4         5
1  0.500000  0.250000  0.000000  0.000000  0.250000
2  0.075758  0.545455  0.075758  0.045455  0.257576
3  0.000000  0.030120  0.433735  0.108434  0.427711
4  0.004348  0.000000  0.347826  0.239130  0.408696
5  0.000000  0.000000  0.000000  0.000000  1.000000

--- (Q) Transitoire → Transitoire ---
          1         2         3         4
1  0.500000  0.250000  0.000000  0.000000
2  0.075758  0.545455  0.075758  0.045455
3  0.000000  0.030120  0.433735  0.108434
4  0.004348  0.000000  0.347826  0.239130

--- (R) Transitoire → Absorbant (L5) ---
          5
1  0.250000
2  0.257576
3  0.427711
4  0.408696

--- (I) Matrice identité ---
     1    2    3    4
1  1.0  0.0  0.0  0.0
2  0.0  1.0  0.0  0.0
3  0.0  0.0  1.0  0.0
4  0.0  0.0  0.0  1.0

--- (N) Matrice fondamentale : N = (I - Q)^(-1) ---
          1         2