In [1]:

# weather_openmeteo_month_minio.py — Open-Meteo/Meteostat → RAW + MinIO
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# weather_openmeteo_month_minio.py — Open-Meteo/Meteostat → RAW + MinIO

import argparse, math, os, sys
from pathlib import Path
import pandas as pd, json, requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# ===== MinIO/S3 =====
def get_s3_client(endpoint, access, secret, secure):
    import boto3
    from botocore.config import Config
    return boto3.client(
        "s3",
        endpoint_url=endpoint,
        aws_access_key_id=access,
        aws_secret_access_key=secret,
        use_ssl=bool(secure),
        verify=bool(secure),
        region_name="us-east-1",
        config=Config(signature_version="s3v4"),
    )

def ensure_bucket(s3, bucket):
    import botocore
    try:
        s3.head_bucket(Bucket=bucket)
    except botocore.exceptions.ClientError:
        s3.create_bucket(Bucket=bucket)

def s3_upload_file(s3, bucket, local_path:Path, key:str):
    s3.upload_file(str(local_path), bucket, key)

# ===== Open-Meteo / Meteostat =====
ARCHIVE_URL = "https://archive-api.open-meteo.com/v1/archive"

def _session():
    s = requests.Session()
    s.headers.update({"User-Agent": "jne-meteo/1.0"})
    s.mount("https://", HTTPAdapter(max_retries=Retry(
        total=3, backoff_factor=0.5, status_forcelist=[429,500,502,503,504], allowed_methods=["GET"]
    )))
    return s

def fetch_openmeteo_archive(lat, lon, start_date, days):
    start = pd.to_datetime(start_date).date()
    end   = (pd.to_datetime(start_date) + pd.Timedelta(days=days-1)).date()
    r = _session().get(ARCHIVE_URL, params={
        "latitude": lat, "longitude": lon,
        "start_date": start.isoformat(), "end_date": end.isoformat(),
        "hourly": "temperature_2m,relative_humidity_2m,windspeed_10m,shortwave_radiation",
        "timezone": "UTC",
    }, timeout=30)
    r.raise_for_status()
    h = r.json().get("hourly", {})
    if not h or "time" not in h:
        raise RuntimeError("Open-Meteo: pas de séries hourly")
    n = len(h["time"])
    def take(k, default=None):
        v = h.get(k)
        return ([default]*n) if v is None else (v if len(v)==n else (v+[default]*(n-len(v)))[:n])
    return pd.DataFrame({
        "ts": pd.to_datetime(h["time"], utc=True),
        "weather_temp_c": take("temperature_2m"),
        "weather_rh_pct": take("relative_humidity_2m"),
        "weather_wind_ms": take("windspeed_10m"),
        "weather_ghi_wm2": take("shortwave_radiation"),
    })

def fallback_meteostat(lat, lon, start_date, days):
    from meteostat import Point, Hourly
    start = pd.to_datetime(start_date, utc=True)
    end   = start + pd.Timedelta(days=days)
    data = Hourly(Point(lat, lon), start, end, tz="UTC").fetch()
    if data.empty: raise RuntimeError("Meteostat: pas de données")
    df = pd.DataFrame({
        "ts": data.index.tz_convert("UTC"),
        "weather_temp_c": data.get("temp"),
        "weather_rh_pct": data.get("rhum"),
        "weather_wind_ms": data.get("wspd"),
    }).reset_index(drop=True)
    hour = df["ts"].dt.hour + df["ts"].dt.minute/60.0
    daylight = pd.Series([max(0.0, math.sin(math.pi*(h-9)/8.0)) for h in hour], index=df.index)
    coco = data.get("coco")
    clouds = (coco.clip(0,9)/12.0).values if coco is not None else 0.25
    df["weather_ghi_wm2"] = 800.0 * daylight * (1 - clouds)
    return df

def resample_15min(df_hourly, dt_min=15):
    df = df_hourly.drop_duplicates(subset=["ts"]).set_index("ts").sort_index().asfreq("1H")
    return df.resample(f"{dt_min}T").interpolate("time", limit_direction="both").reset_index()

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--lat", type=float, default=31.6295)
    ap.add_argument("--lon", type=float, default=-7.9811)
    ap.add_argument("--start", type=str, default="2025-03-01")
    ap.add_argument("--days", type=int, default=30)
    ap.add_argument("--out", type=str, default="~/DTE/jne_project/raw")
    # MinIO
    ap.add_argument("--endpoint", type=str, default=os.environ.get("MINIO_ENDPOINT", "http://192.168.0.173:9000"))
    ap.add_argument("--access",   type=str, default=os.environ.get("MINIO_ROOT_USER", "minioadmin"))
    ap.add_argument("--secret",   type=str, default=os.environ.get("MINIO_ROOT_PASSWORD", "minioadmin"))
    ap.add_argument("--bucket",   type=str, default="raw")
    ap.add_argument("--prefix",   type=str, default="jne_project/raw")
    ap.add_argument("--secure",   action="store_true")
    ap.add_argument("--no-upload", action="store_true")
    args, _ = ap.parse_known_args()

    try:
        df_hour = fetch_openmeteo_archive(args.lat, args.lon, args.start, args.days)
        src = "open-meteo"
    except Exception as e:
        print(f"[Open-Meteo] échec: {e} → Meteostat")
        df_hour = fallback_meteostat(args.lat, args.lon, args.start, args.days)
        src = "meteostat"

    df_15 = resample_15min(df_hour)[["ts","weather_temp_c","weather_rh_pct","weather_wind_ms","weather_ghi_wm2"]]

    month = f"{pd.to_datetime(args.start).year}-{pd.to_datetime(args.start).month:02d}"
    out_dir = Path(args.out).expanduser().resolve() / "weather" / month
    meta_dir = Path(args.out).expanduser().resolve() / "meta" / month
    for d in (out_dir, meta_dir):
        d.mkdir(parents=True, exist_ok=True)

    out_path = out_dir / "weather.csv"
    df_15.to_csv(out_path, index=False, encoding="utf-8")

    manifest = {
        "version": "1.0",
        "source": src,
        "geo": {"lat": args.lat, "lon": args.lon},
        "time": {"start_date": args.start, "days": args.days, "dt_minutes": 15, "rows": len(df_15)},
        "paths": {"base": str(Path(args.out).expanduser().resolve()), "weather_csv": str(out_path), "month": month},
        "columns": ["ts","weather_temp_c","weather_rh_pct","weather_wind_ms","weather_ghi_wm2"]
    }
    man_path = meta_dir/"weather_manifest.json"
    man_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")

    if not args.no_upload:
        try:
            s3 = get_s3_client(args.endpoint, args.access, args.secret, args.secure)
            ensure_bucket(s3, args.bucket)
            root = f"{args.prefix}".strip("/")
            key_w = f"{root}/weather/{month}/weather.csv"
            key_m = f"{root}/meta/{month}/weather_manifest.json"
            s3_upload_file(s3, args.bucket, out_path, key_w)
            s3_upload_file(s3, args.bucket, man_path, key_m)
        except Exception as e:
            print("ERREUR: upload MinIO:", e, file=sys.stderr); sys.exit(3)

    print("OK — weather")
    print("local:", out_path, "|", man_path)
    if not args.no_upload:
        print("minio:", f"s3://{args.bucket}/{args.prefix}/{{weather,meta}}/{month}/...")

if __name__ == "__main__":
    main()


  df = df_hourly.drop_duplicates(subset=["ts"]).set_index("ts").sort_index().asfreq("1H")
  return df.resample(f"{dt_min}T").interpolate("time", limit_direction="both").reset_index()


OK — weather
local: /home/amina/DTE/jne_project/raw/weather/2025-03/weather.csv | /home/amina/DTE/jne_project/raw/meta/2025-03/weather_manifest.json
minio: s3://raw/jne_project/raw/{weather,meta}/2025-03/...
