In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# occupancy_only_minio.py — OCCUPATION (RAW 15min) + upload MinIO

from pathlib import Path
import argparse, json, os, sys
import numpy as np
import pandas as pd

# ===== MinIO/S3 =====
def get_s3_client(endpoint, access, secret, secure):
    import boto3
    from botocore.config import Config
    return boto3.client(
        "s3",
        endpoint_url=endpoint,
        aws_access_key_id=access,
        aws_secret_access_key=secret,
        use_ssl=bool(secure),
        verify=bool(secure),
        region_name="us-east-1",
        config=Config(signature_version="s3v4"),
    )

def ensure_bucket(s3, bucket):
    import botocore
    try:
        s3.head_bucket(Bucket=bucket)
    except botocore.exceptions.ClientError:
        s3.create_bucket(Bucket=bucket)

def s3_upload_file(s3, bucket, local_path:Path, key:str):
    s3.upload_file(str(local_path), bucket, key)

# ===== Main =====
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--start", type=str, default="2025-03-01 00:00:00")
    ap.add_argument("--days",  type=int, default=30)
    ap.add_argument("--dtmin", type=int, default=15)
    ap.add_argument("--seed",  type=int, default=42)
    ap.add_argument("--base",  type=str, default="~/DTE/jne_project/raw")
    ap.add_argument("--p_weekday", type=float, default=0.90)
    ap.add_argument("--p_weekend", type=float, default=0.10)
    # MinIO
    ap.add_argument("--endpoint", type=str, default=os.environ.get("MINIO_ENDPOINT", "http://192.168.0.173:9000"))
    ap.add_argument("--access",   type=str, default=os.environ.get("MINIO_ROOT_USER", "minioadmin"))
    ap.add_argument("--secret",   type=str, default=os.environ.get("MINIO_ROOT_PASSWORD", "minioadmin"))
    ap.add_argument("--bucket",   type=str, default="raw")
    ap.add_argument("--prefix",   type=str, default="jne_project/raw")
    ap.add_argument("--secure",   action="store_true")
    ap.add_argument("--no-upload", action="store_true")
    args, _ = ap.parse_known_args()

    np.random.seed(args.seed)

    # temps
    periods = int(args.days * 24 * 60 / args.dtmin)
    ts = pd.date_range(pd.Timestamp(args.start, tz="UTC"),
                       periods=periods, freq=f"{args.dtmin}min")
    hours = (ts.hour.to_numpy() + ts.minute.to_numpy()/60.0).astype(float)
    wday  = ts.weekday.to_numpy()
    dayn  = (((ts.normalize()-ts[0].normalize())/pd.Timedelta(days=1))
             .to_numpy().astype(int) + 1)

    # horaires
    def in_shift(h, start=8.0, end=18.0): return (h >= start) & (h <= end)
    in_shift_default = in_shift(hours, 8, 18)
    in_shift_s3      = in_shift(hours, 9, 19)           # J16–J18
    shift_mask = in_shift_default.copy()
    mask_s3 = (dayn>=16) & (dayn<=18)
    shift_mask[mask_s3] = in_shift_s3[mask_s3]

    # présence
    p = np.where(wday < 5, args.p_weekday, args.p_weekend)
    presence = ((np.random.rand(ts.size) < p) & shift_mask).astype(int)

    # niveau
    level = np.full(ts.size, "low", dtype=object)
    mask_pres = presence.astype(bool)
    high_day = (dayn == 10) & mask_pres
    level[high_day] = "high"
    rest = mask_pres & (~high_day)
    r = np.random.rand(rest.sum())
    level[rest] = np.where(r < 0.6, "normal", np.where(r < 0.9, "med", "high"))

    df = pd.DataFrame({"ts": ts, "presence": presence, "level": level})

    # sorties locales
    base = Path(args.base).expanduser().resolve()
    month = f"{ts[0].year}-{ts[0].month:02d}"
    out_dir  = base / "occupancy" / month
    meta_dir = base / "meta" / month
    out_dir.mkdir(parents=True, exist_ok=True)
    meta_dir.mkdir(parents=True, exist_ok=True)

    csv_path = out_dir / "occupancy.csv"
    df.to_csv(csv_path, index=False)

    manifest = {
        "version": "1.0",
        "time": {"start_utc": str(ts[0]), "days": args.days, "dt_minutes": args.dtmin, "rows": int(len(df))},
        "paths": {"base": str(base), "occupancy_csv": str(csv_path), "month": month},
        "columns": {"time": "ts", "presence": "presence", "level": "level"},
        "probas": {"weekday": args.p_weekday, "weekend": args.p_weekend},
        "shift_rules": {"default": "08:00-18:00", "week3_days_16_18": "09:00-19:00"},
        "rng_seed": args.seed
    }
    man_path = meta_dir / "occupancy_manifest.json"
    man_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")

    # upload MinIO
    if not args.no_upload:
        try:
            s3 = get_s3_client(args.endpoint, args.access, args.secret, args.secure)
            ensure_bucket(s3, args.bucket)
            root = f"{args.prefix}".strip("/")
            key_o = f"{root}/occupancy/{month}/occupancy.csv"
            key_m = f"{root}/meta/{month}/occupancy_manifest.json"
            s3_upload_file(s3, args.bucket, csv_path, key_o)
            s3_upload_file(s3, args.bucket, man_path, key_m)
        except Exception as e:
            print("ERREUR: upload MinIO:", e, file=sys.stderr); sys.exit(3)

    print("OK — occupancy")
    print("local:", csv_path, "|", man_path)
    if not args.no_upload:
        print("minio:", f"s3://{args.bucket}/{args.prefix}/{{occupancy,meta}}/{month}/...")

if __name__ == "__main__":
    main()


OK — occupancy
local: /home/amina/DTE/jne_project/raw/occupancy/2025-03/occupancy.csv | /home/amina/DTE/jne_project/raw/meta/2025-03/occupancy_manifest.json
minio: s3://raw/jne_project/raw/{occupancy,meta}/2025-03/...
