In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# occupancy_only.py — source OCCUPATION (RAW, sans graphiques)
# Sorties :
#   ~/DTE/jne_project/raw/occupancy/YYYY-MM/occupancy.csv
#   ~/DTE/jne_project/raw/meta/YYYY-MM/occupancy_manifest.json

from pathlib import Path
import argparse, json
import numpy as np
import pandas as pd

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--start", type=str, default="2025-03-01 00:00:00")
    ap.add_argument("--days",  type=int, default=30)
    ap.add_argument("--dtmin", type=int, default=15)
    ap.add_argument("--seed",  type=int, default=42)
    ap.add_argument("--base",  type=str, default="~/DTE/jne_project/raw")
    ap.add_argument("--p_weekday", type=float, default=0.90)
    ap.add_argument("--p_weekend", type=float, default=0.10)
    args, _ = ap.parse_known_args()

    np.random.seed(args.seed)

    # --- temps (UTC) ---
    periods = int(args.days * 24 * 60 / args.dtmin)
    ts = pd.date_range(pd.Timestamp(args.start, tz="UTC"),
                       periods=periods, freq=f"{args.dtmin}min")
    hours = (ts.hour.to_numpy() + ts.minute.to_numpy()/60.0).astype(float)
    wday  = ts.weekday.to_numpy()
    dayn  = (((ts.normalize()-ts[0].normalize())/pd.Timedelta(days=1))
             .to_numpy().astype(int) + 1)

    # --- horaires ---
    def in_shift(h, start=8.0, end=18.0):
        return (h >= start) & (h <= end)

    in_shift_default = in_shift(hours, 8, 18)
    in_shift_s3      = in_shift(hours, 9, 19)           # semaine 3 (J16–J18) : 09–19
    shift_mask = in_shift_default.copy()
    mask_s3 = (dayn>=16) & (dayn<=18)
    shift_mask[mask_s3] = in_shift_s3[mask_s3]

    # --- présence binaire ---
    p = np.where(wday < 5, args.p_weekday, args.p_weekend)  # proba par timestamp
    presence = ((np.random.rand(ts.size) < p) & shift_mask).astype(int)

    # --- niveau d’occupation ---
    level = np.full(ts.size, "low", dtype=object)
    mask_pres = presence.astype(bool)

    # jour “forte affluence” J10
    high_day = (dayn == 10) & mask_pres
    level[high_day] = "high"

    # autres jours présents : 60% normal, 30% med, 10% high
    rest = mask_pres & (~high_day)
    r = np.random.rand(rest.sum())
    level[rest] = np.where(r < 0.6, "normal", np.where(r < 0.9, "med", "high"))

    df = pd.DataFrame({"ts": ts, "presence": presence, "level": level})

    # --- sauvegarde RAW + manifeste ---
    base = Path(args.base).expanduser().resolve()
    month = f"{ts[0].year}-{ts[0].month:02d}"
    out_dir  = base / "occupancy" / month
    meta_dir = base / "meta" / month
    out_dir.mkdir(parents=True, exist_ok=True)
    meta_dir.mkdir(parents=True, exist_ok=True)

    csv_path = out_dir / "occupancy.csv"
    df.to_csv(csv_path, index=False)

    manifest = {
        "version": "1.0",
        "time": {"start_utc": str(ts[0]), "days": args.days, "dt_minutes": args.dtmin, "rows": int(len(df))},
        "paths": {"base": str(base), "occupancy_csv": str(csv_path), "month": month},
        "columns": {"time": "ts", "presence": "presence", "level": "level"},
        "probas": {"weekday": args.p_weekday, "weekend": args.p_weekend},
        "shift_rules": {"default": "08:00-18:00", "week3_days_16_18": "09:00-19:00"},
        "rng_seed": args.seed
    }
    (meta_dir / "occupancy_manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")

    print("OK — occupancy →", csv_path)

if __name__ == "__main__":
    main()


OK — occupancy → /home/amina/DTE/jne_project/raw/occupancy/2025-03/occupancy.csv
