In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# refine_bms.py — RAW/bms → REFINED + MinIO

from pathlib import Path
import argparse, os, sys, json
import pandas as pd
import numpy as np

# ==== MinIO ====
def get_s3_client(endpoint, access, secret, secure):
    import boto3
    from botocore.config import Config
    return boto3.client(
        "s3",
        endpoint_url=endpoint,
        aws_access_key_id=access,
        aws_secret_access_key=secret,
        use_ssl=bool(secure),
        verify=bool(secure),
        region_name="us-east-1",
        config=Config(signature_version="s3v4"),
    )

def ensure_bucket(s3, bucket):
    import botocore
    try:
        s3.head_bucket(Bucket=bucket)
    except botocore.exceptions.ClientError:
        s3.create_bucket(Bucket=bucket)

def s3_upload(s3, bucket, p:Path, key:str):
    s3.upload_file(str(p), bucket, key)

# ==== helpers ====
def std_index(df, tcol="ts", freq="15min"):
    df = df.copy()
    df[tcol] = pd.to_datetime(df[tcol], utc=True)
    df = df.drop_duplicates(subset=[tcol]).set_index(tcol).sort_index()
    rng = pd.date_range(df.index.min(), df.index.max(), freq=freq, tz="UTC")
    return df.reindex(rng)

def qa_fill(s, limit=4):
    x = s.astype("float64")
    x = x.interpolate("time", limit=limit).ffill(limit=limit).bfill(limit=limit)
    return x

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--month", type=str, default="2025-03")
    ap.add_argument("--raw_base", type=str, default="~/DTE/jne_project/raw")
    ap.add_argument("--ref_base", type=str, default="~/DTE/jne_project/refined")
    # MinIO
    ap.add_argument("--endpoint", type=str, default=os.environ.get("MINIO_ENDPOINT","http://192.168.0.173:9000"))
    ap.add_argument("--access",   type=str, default=os.environ.get("MINIO_ROOT_USER","minioadmin"))
    ap.add_argument("--secret",   type=str, default=os.environ.get("MINIO_ROOT_PASSWORD","minioadmin"))
    ap.add_argument("--bucket",   type=str, default="refined")
    ap.add_argument("--prefix",   type=str, default="jne_project/refined")
    ap.add_argument("--secure",   action="store_true")
    ap.add_argument("--no-upload", action="store_true")
    args,_ = ap.parse_known_args()

    raw = Path(args.raw_base).expanduser().resolve()/ "bms"/args.month/"bms.csv"
    ref_dir = Path(args.ref_base).expanduser().resolve()/ "bms"/args.month
    meta_dir= Path(args.ref_base).expanduser().resolve()/ "meta"/args.month
    ref_dir.mkdir(parents=True, exist_ok=True); meta_dir.mkdir(parents=True, exist_ok=True)

    df = pd.read_csv(raw)
    idx = std_index(df, "ts", "15min")

    bounds = {
        "T_int": (0, 50),
        "T_int_true": (0, 50),
        "T_set": (5, 35),
        "P_hvac": (0, 100),
        "P_lighting": (0, 50),
        "P_plug": (0, 50),
        "P_total": (0, 150),
    }

    out = pd.DataFrame(index=idx.index); qa={}
    # signaux continus
    for c,(lo,hi) in bounds.items():
        if c not in idx.columns: continue
        rawc = idx[c]
        clp  = rawc.clip(lo, hi)
        fill = qa_fill(clp)
        out[c] = fill
        qa[c] = {"n_raw_null": int(rawc.isna().sum()),
                 "n_clipped": int(((rawc<lo)|(rawc>hi)).sum()),
                 "n_final_null": int(fill.isna().sum())}
    # états binaires
    for c in ["hvac_state","lighting_state"]:
        if c in idx.columns:
            s = qa_fill(idx[c]).round().clip(0,1).astype("Int64")
            out[c] = s
            qa[c] = {"n_final_null": int(s.isna().sum())}

    # zone_id si présent
    if "zone_id" in idx.columns:
        out["zone_id"] = idx["zone_id"].ffill().bfill()

    out = out.reset_index().rename(columns={"index":"ts"})

    p_out = ref_dir/"bms_refined.csv"
    out.to_csv(p_out, index=False)

    manifest = {
        "version":"1.0","month":args.month,
        "input": str(raw),
        "output": str(p_out),
        "qa": qa,
        "transform":{"tz":"UTC","freq":"15min","interpolation_limit":4,"clips":bounds,"states":["hvac_state","lighting_state"]}
    }
    p_man = meta_dir/"refined_manifest_bms.json"
    p_man.write_text(json.dumps(manifest, indent=2), encoding="utf-8")

    if not args.no_upload:
        try:
            s3 = get_s3_client(args.endpoint, args.access, args.secret, args.secure)
            ensure_bucket(s3, args.bucket)
            root = args.prefix.strip("/")
            s3_upload(s3, args.bucket, p_out, f"{root}/bms/{args.month}/bms_refined.csv")
            s3_upload(s3, args.bucket, p_man, f"{root}/meta/{args.month}/refined_manifest_bms.json")
        except Exception as e:
            print("ERREUR upload MinIO:", e, file=sys.stderr); sys.exit(3)

    print("OK — bms refined")
    print("local:", p_out, "|", p_man)
    if not args.no_upload:
        print("minio:", f"s3://{args.bucket}/{args.prefix}/{{bms,meta}}/{args.month}/...")

if __name__ == "__main__":
    main()


OK — bms refined
local: /home/amina/DTE/jne_project/refined/bms/2025-03/bms_refined.csv | /home/amina/DTE/jne_project/refined/meta/2025-03/refined_manifest_bms.json
minio: s3://refined/jne_project/refined/{bms,meta}/2025-03/...
