In [12]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# sensors_only_fixed.py — simulation CAPTEURS (30 jours @15min) — SANS PLOT
# Sorties :
#   ~/DTE/jne_project/raw/sensors/YYYY-MM/zone_101_sensors.csv
#   ~/DTE/jne_project/raw/labels/YYYY-MM/labels_sensors.csv
#   ~/DTE/jne_project/raw/meta/YYYY-MM/sensors_manifest.json

from pathlib import Path
import argparse, json
import numpy as np
import pandas as pd

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--start", type=str, default="2025-03-01 00:00:00")
    ap.add_argument("--days",  type=int, default=30)
    ap.add_argument("--dtmin", type=int, default=15)
    ap.add_argument("--seed",  type=int, default=42)
    # Chemin par défaut figé sous ~/DTE
    ap.add_argument("--base",  type=str, default="~/DTE/jne_project/raw")
    args, _ = ap.parse_known_args()

    np.random.seed(args.seed)

    # Temps (UTC)
    periods = int(args.days * 24 * 60 / args.dtmin)
    ts = pd.date_range(pd.Timestamp(args.start, tz="UTC"),
                       periods=periods, freq=f"{args.dtmin}min")
    h = ts.hour + ts.minute/60.0
    wday = ts.weekday
    dnum = ((ts.normalize()-ts[0].normalize())/pd.Timedelta(days=1)).astype(int)+1

    # Capteurs simulés
    in_shift = ((h>=8) & (h<=18) & (wday<5)).astype(int)
    pir = (np.random.rand(len(ts)) < (0.9*in_shift + 0.1*(1-in_shift))).astype(int)

    t_base = 22 + 2.0*np.sin(2*np.pi*(h-14)/24)
    temp_int_c  = t_base + 0.6*pir + np.random.normal(0, 0.15, len(ts))
    rh_int_pct  = np.clip(55 - 6*np.sin(2*np.pi*(h-14)/24) + np.random.normal(0,1.2,len(ts)), 30, 80)
    co2_ppm     = 450 + 350*pir + 80*np.maximum(0, np.sin(2*np.pi*(h-8)/10))*pir + np.random.normal(0,25,len(ts))
    co2_ppm     = np.clip(co2_ppm, 400, None)
    ghi_shape   = np.maximum(0, np.sin(np.pi*(h-9)/8))
    lighting_cmd= (pir & (ghi_shape<0.3)).astype(int)
    power_total_kw = 1.5 + 0.6*pir + 2.8*lighting_cmd + 0.9*np.maximum(0, (temp_int_c-22)) + np.random.normal(0,0.15,len(ts))
    power_total_kw = np.clip(power_total_kw, 0, None)

    # DataFrame (noms conservés)
    df = pd.DataFrame({
        "ts": ts, "zone_id": "brick:Room_101",
        "temp_int_c": temp_int_c,
        "rh_int_pct": rh_int_pct,
        "co2_ppm": co2_ppm,
        "pir_bin": pir,
        "power_total_kw": power_total_kw
    })

    # Anomalies + labels
    labels = []
    def mask_window(day:int, start_hhmm:str, end_hhmm:str):
        s = pd.to_datetime(start_hhmm).time()
        e = pd.to_datetime(end_hhmm).time()
        return (dnum==day) & (df["ts"].dt.time>=s) & (df["ts"].dt.time<=e)
    def mask_instant(day:int, hhmm:str):
        t = pd.to_datetime(hhmm).time()
        return (dnum==day) & (df["ts"].dt.time==t)

    m = mask_window(23, "14:00", "16:00")
    if m.any():
        v0 = df.loc[m, "temp_int_c"].iloc[0]
        df.loc[m, "temp_int_c"] = v0
        labels.append(dict(start_ts=df.loc[m, "ts"].iloc[0], end_ts=df.loc[m, "ts"].iloc[-1],
                           sensor="temp_int_c", anomaly_type="A1_sensor_stuck", severity="med"))

    m = (dnum==19)
    if m.any():
        idx = df.index[m]
        drift = np.linspace(0, 0.5, idx.size)
        df.loc[idx, "temp_int_c"] = df.loc[idx, "temp_int_c"].to_numpy() + drift
        labels.append(dict(start_ts=df.loc[idx, "ts"].iloc[0], end_ts=df.loc[idx, "ts"].iloc[-1],
                           sensor="temp_int_c", anomaly_type="A2_sensor_drift", severity="low"))

    for hhmm in ["12:00", "15:00"]:
        m = mask_instant(19, hhmm)
        if m.any():
            df.loc[m, "power_total_kw"] = 3.0 * df.loc[m, "power_total_kw"]
            labels.append(dict(start_ts=df.loc[m, "ts"].iloc[0], end_ts=df.loc[m, "ts"].iloc[-1],
                               sensor="power_total_kw", anomaly_type="A3_meter_spike", severity="high"))

    labels_df = pd.DataFrame(labels)

    # Sauvegarde + manifeste
    base = Path(args.base).expanduser().resolve()
    month = f"{ts[0].year}-{ts[0].month:02d}"
    s_dir = base/"sensors"/month
    l_dir = base/"labels"/month
    m_dir = base/"meta"/month
    for d in (s_dir, l_dir, m_dir):
        d.mkdir(parents=True, exist_ok=True)

    s_out = s_dir/"zone_101_sensors.csv"
    l_out = l_dir/"labels_sensors.csv"
    df.to_csv(s_out, index=False)
    labels_df.to_csv(l_out, index=False)

    manifest = {
        "version": "1.0",
        "time": {
            "start_utc": str(ts[0]),
            "days": args.days,
            "dt_minutes": args.dtmin,
            "rows": len(df)
        },
        "paths": {
            "base": str(base),
            "sensors_csv": str(s_out),
            "labels_csv": str(l_out),
            "month": month
        },
        "columns": {
            "time": "ts",
            "zone_id": "zone_id",
            "signals": [
                {"name":"temp_int_c",     "unit":"Celsius", "brick_hint":"Temperature_Sensor"},
                {"name":"rh_int_pct",     "unit":"Percent", "brick_hint":"Humidity_Sensor"},
                {"name":"co2_ppm",        "unit":"PPM",     "brick_hint":"CO2_Sensor"},
                {"name":"pir_bin",        "unit":"One",     "brick_hint":"Occupancy_Sensor"},
                {"name":"power_total_kw", "unit":"kW",      "brick_hint":"Power_Sensor"},
            ]
        },
        "rng_seed": args.seed
    }
    man_out = m_dir/"sensors_manifest.json"
    man_out.write_text(json.dumps(manifest, indent=2), encoding="utf-8")

    print("OK")
    print("rows:", len(df), "| freq/min:", args.dtmin, "| days:", args.days)
    print("out sensors:", s_out)
    print("out labels :", l_out)
    print("manifest   :", man_out)

if __name__ == "__main__":
    main()


OK
rows: 2880 | freq/min: 15 | days: 30
out sensors: /home/amina/DTE/jne_project/raw/sensors/2025-03/zone_101_sensors.csv
out labels : /home/amina/DTE/jne_project/raw/labels/2025-03/labels_sensors.csv
manifest   : /home/amina/DTE/jne_project/raw/meta/2025-03/sensors_manifest.json


In [4]:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# sensors_only_fixed_minio.py — simulation CAPTEURS (30 jours @15min) — SANS PLOT
# Sorties locales (inchangées) :
#   ~/DTE/jne_project/raw/sensors/YYYY-MM/zone_101_sensors.csv
#   ~/DTE/jne_project/raw/labels/YYYY-MM/labels_sensors.csv
#   ~/DTE/jne_project/raw/meta/YYYY-MM/sensors_manifest.json
# Et envoi en parallèle vers MinIO (bucket 'raw') sous les mêmes préfixes.

from pathlib import Path
import argparse, json, os
import numpy as np
import pandas as pd
from io import BytesIO

# MinIO
from minio import Minio
from minio.error import S3Error

def minio_client():
    endpoint   = os.getenv("MINIO_ENDPOINT")
    access_key = os.getenv("MINIO_ACCESS_KEY")
    secret_key = os.getenv("MINIO_SECRET_KEY")
    secure     = os.getenv("MINIO_SECURE", "false").lower() == "true"
    if not all([endpoint, access_key, secret_key]):
        raise RuntimeError("MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY requis")
    return Minio(endpoint, access_key=access_key, secret_key=secret_key, secure=secure)

def ensure_bucket(cli, bucket: str):
    if not cli.bucket_exists(bucket):
        cli.make_bucket(bucket)

def put_file(cli, bucket: str, key: str, path: Path, content_type: str):
    data = path.read_bytes()
    cli.put_object(bucket, key, data=BytesIO(data), length=len(data), content_type=content_type)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--start", type=str, default="2025-03-01 00:00:00")
    ap.add_argument("--days",  type=int, default=30)
    ap.add_argument("--dtmin", type=int, default=15)
    ap.add_argument("--seed",  type=int, default=42)
    # Chemin par défaut figé sous ~/DTE
    ap.add_argument("--base",  type=str, default="~/DTE/jne_project/raw")
    # MinIO
    ap.add_argument("--minio-bucket", type=str, default="raw")
    ap.add_argument("--no-minio", action="store_true", help="désactiver l'upload MinIO")
    args, _ = ap.parse_known_args()

    np.random.seed(args.seed)

    # Temps (UTC)
    periods = int(args.days * 24 * 60 / args.dtmin)
    ts = pd.date_range(pd.Timestamp(args.start, tz="UTC"),
                       periods=periods, freq=f"{args.dtmin}min")
    h = ts.hour + ts.minute/60.0
    wday = ts.weekday
    dnum = ((ts.normalize()-ts[0].normalize())/pd.Timedelta(days=1)).astype(int)+1

    # Capteurs simulés
    in_shift = ((h>=8) & (h<=18) & (wday<5)).astype(int)
    pir = (np.random.rand(len(ts)) < (0.9*in_shift + 0.1*(1-in_shift))).astype(int)

    t_base = 22 + 2.0*np.sin(2*np.pi*(h-14)/24)
    temp_int_c  = t_base + 0.6*pir + np.random.normal(0, 0.15, len(ts))
    rh_int_pct  = np.clip(55 - 6*np.sin(2*np.pi*(h-14)/24) + np.random.normal(0,1.2,len(ts)), 30, 80)
    co2_ppm     = 450 + 350*pir + 80*np.maximum(0, np.sin(2*np.pi*(h-8)/10))*pir + np.random.normal(0,25,len(ts))
    co2_ppm     = np.clip(co2_ppm, 400, None)
    ghi_shape   = np.maximum(0, np.sin(np.pi*(h-9)/8))
    lighting_cmd= (pir & (ghi_shape<0.3)).astype(int)
    power_total_kw = 1.5 + 0.6*pir + 2.8*lighting_cmd + 0.9*np.maximum(0, (temp_int_c-22)) + np.random.normal(0,0.15,len(ts))
    power_total_kw = np.clip(power_total_kw, 0, None)

    # DataFrame (noms conservés)
    df = pd.DataFrame({
        "ts": ts, "zone_id": "brick:Room_101",
        "temp_int_c": temp_int_c,
        "rh_int_pct": rh_int_pct,
        "co2_ppm": co2_ppm,
        "pir_bin": pir,
        "power_total_kw": power_total_kw
    })

    # Anomalies + labels
    labels = []
    def mask_window(day:int, start_hhmm:str, end_hhmm:str):
        s = pd.to_datetime(start_hhmm).time()
        e = pd.to_datetime(end_hhmm).time()
        return (dnum==day) & (df["ts"].dt.time>=s) & (df["ts"].dt.time<=e)
    def mask_instant(day:int, hhmm:str):
        t = pd.to_datetime(hhmm).time()
        return (dnum==day) & (df["ts"].dt.time==t)

    m = mask_window(23, "14:00", "16:00")
    if m.any():
        v0 = df.loc[m, "temp_int_c"].iloc[0]
        df.loc[m, "temp_int_c"] = v0
        labels.append(dict(start_ts=df.loc[m, "ts"].iloc[0], end_ts=df.loc[m, "ts"].iloc[-1],
                           sensor="temp_int_c", anomaly_type="A1_sensor_stuck", severity="med"))

    m = (dnum==19)
    if m.any():
        idx = df.index[m]
        drift = np.linspace(0, 0.5, idx.size)
        df.loc[idx, "temp_int_c"] = df.loc[idx, "temp_int_c"].to_numpy() + drift
        labels.append(dict(start_ts=df.loc[idx, "ts"].iloc[0], end_ts=df.loc[idx, "ts"].iloc[-1],
                           sensor="temp_int_c", anomaly_type="A2_sensor_drift", severity="low"))

    for hhmm in ["12:00", "15:00"]:
        m = mask_instant(19, hhmm)
        if m.any():
            df.loc[m, "power_total_kw"] = 3.0 * df.loc[m, "power_total_kw"]
            labels.append(dict(start_ts=df.loc[m, "ts"].iloc[0], end_ts=df.loc[m, "ts"].iloc[-1],
                               sensor="power_total_kw", anomaly_type="A3_meter_spike", severity="high"))

    labels_df = pd.DataFrame(labels)

    # Sauvegarde + manifeste
    base = Path(args.base).expanduser().resolve()
    month = f"{ts[0].year}-{ts[0].month:02d}"
    s_dir = base/"sensors"/month
    l_dir = base/"labels"/month
    m_dir = base/"meta"/month
    for d in (s_dir, l_dir, m_dir):
        d.mkdir(parents=True, exist_ok=True)

    s_out = s_dir/"zone_101_sensors.csv"
    l_out = l_dir/"labels_sensors.csv"
    df.to_csv(s_out, index=False)
    labels_df.to_csv(l_out, index=False)

    manifest = {
        "version": "1.0",
        "time": {
            "start_utc": str(ts[0]),
            "days": args.days,
            "dt_minutes": args.dtmin,
            "rows": len(df)
        },
        "paths": {
            "base": str(base),
            "sensors_csv": str(s_out),
            "labels_csv": str(l_out),
            "month": month
        },
        "columns": {
            "time": "ts",
            "zone_id": "zone_id",
            "signals": [
                {"name":"temp_int_c",     "unit":"Celsius", "brick_hint":"Temperature_Sensor"},
                {"name":"rh_int_pct",     "unit":"Percent", "brick_hint":"Humidity_Sensor"},
                {"name":"co2_ppm",        "unit":"PPM",     "brick_hint":"CO2_Sensor"},
                {"name":"pir_bin",        "unit":"One",     "brick_hint":"Occupancy_Sensor"},
                {"name":"power_total_kw", "unit":"kW",      "brick_hint":"Power_Sensor"},
            ]
        },
        "rng_seed": args.seed
    }
    man_out = m_dir/"sensors_manifest.json"
    man_out.write_text(json.dumps(manifest, indent=2), encoding="utf-8")

    # Upload MinIO (mêmes préfixes côté bucket 'raw')
    if not args.no_minio:
        cli = minio_client()
        ensure_bucket(cli, args.minio_bucket)
        # Clés objets
        key_s = f"sensors/{month}/zone_101_sensors.csv"
        key_l = f"labels/{month}/labels_sensors.csv"
        key_m = f"meta/{month}/sensors_manifest.json"
        put_file(cli, args.minio_bucket, key_s, s_out, "text/csv")
        put_file(cli, args.minio_bucket, key_l, l_out, "text/csv")
        put_file(cli, args.minio_bucket, key_m, man_out, "application/json")
        print("minio:", f"s3://{args.minio_bucket}/{key_s}")
        print("minio:", f"s3://{args.minio_bucket}/{key_l}")
        print("minio:", f"s3://{args.minio_bucket}/{key_m}")

    print("OK")
    print("rows:", len(df), "| freq/min:", args.dtmin, "| days:", args.days)
    print("out sensors:", s_out)
    print("out labels :", l_out)
    print("manifest   :", man_out)

if __name__ == "__main__":
    main()

RuntimeError: MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY requis

In [None]:
q