In [None]:
# src/mazda_pipeline/config.py
from dataclasses import dataclass
import os
from pathlib import Path

@dataclass(frozen=True)
class Settings:
    data_in: Path = Path(os.getenv("MAZDA_DATA_IN", "data/sample/mazda_claims_sample.csv"))
    out_dir: Path = Path(os.getenv("MAZDA_OUT_DIR", "out"))
    privacy_mode: str = os.getenv("MAZDA_PRIVACY_MODE", "public")  # "public" | "internal"

SETTINGS = Settings()


In [None]:
# src/mazda_pipeline/io.py
import pandas as pd
from .config import SETTINGS

def load_claims() -> pd.DataFrame:
    p = SETTINGS.data_in
    if p.suffix.lower() in [".csv"]:
        return pd.read_csv(p)
    if p.suffix.lower() in [".xlsx"]:
        return pd.read_excel(p)
    raise ValueError(f"Unsupported input: {p}")

def ensure_out_dir():
    SETTINGS.out_dir.mkdir(parents=True, exist_ok=True)


In [None]:
# src/mazda_pipeline/sanitize.py
import hashlib
import pandas as pd

SENSITIVE_COLS = {
    "ClaimNo", "クレームNO", "VIN", "VehicleID", "DealerCode", "CustomerName",
    "Phone", "Email", "Address", "CaseID", "TicketID"
}

def _hash_series(s: pd.Series, salt: str) -> pd.Series:
    def h(x):
        if pd.isna(x): 
            return x
        x = str(x)
        return hashlib.sha256((salt + x).encode("utf-8")).hexdigest()[:12]
    return s.map(h)

def sanitize_public(df: pd.DataFrame, salt: str = "PUBLIC_DEMO_SALT") -> pd.DataFrame:
    out = df.copy()
    for c in out.columns:
        if c in SENSITIVE_COLS:
            out[c] = _hash_series(out[c], salt)
    # Optional: coarse date generalization without dropping the column
    for dc in [c for c in out.columns if "date" in c.lower() or "日" in c]:
        try:
            d = pd.to_datetime(out[dc], errors="coerce")
            out[dc] = d.dt.to_period("M").astype(str)  # YYYY-MM
        except Exception:
            pass
    return out


In [None]:
# src/mazda_pipeline/steps/50_export.py
import pandas as pd
from ..config import SETTINGS
from ..sanitize import sanitize_public
from ..io import ensure_out_dir

def export_results(df: pd.DataFrame) -> None:
    ensure_out_dir()
    out = df
    if SETTINGS.privacy_mode == "public":
        out = sanitize_public(df)
    out.to_csv(SETTINGS.out_dir / "mazda_results.csv", index=False)


In [None]:
# src/mazda_pipeline/run.py
from .io import load_claims
# import your existing step functions here
# from .steps.20_clean import clean
# ...

def main():
    df = load_claims()
    # df = clean(df)
    # df = features(df)
    # df = rules(df)
    # export_results(df)
    return

if __name__ == "__main__":
    main()
