In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random, os

rng = np.random.default_rng(42)
random.seed(42)

# --- Config
models = [
    ("SONATA", ["SE", "SEL", "LIMITED"]),
    ("ELANTRA", ["SE", "SEL", "N_LINE"]),
    ("TUCSON", ["SE", "SEL", "LIMITED"]),
    ("SANTA_FE", ["SE", "SEL", "LIMITED"]),
]
assembly_lines = ["Line_1", "Line_2", "Line_3", "Line_4"]
plants = ["HMMA"]  # Hyundai Motor Manufacturing Alabama (example plant code)
shifts = ["Day", "Night"]

parts = [
    ("BATTERY", ["BAT_A", "BAT_B", "BAT_C"]),
    ("AIRBAG", ["ABG_X", "ABG_Y"]),
    ("INFOTAINMENT", ["INF_V1", "INF_V2", "INF_V3"]),
    ("BRAKE", ["BRK_STD", "BRK_ENH"]),
    ("SEAT", ["ST_FAB", "ST_LEATHER"]),
    ("ECU", ["ECU_1", "ECU_2"]),
    ("HEADLAMP", ["HL_HAL", "HL_LED"]),
    ("WHEEL", ["WH_17", "WH_18", "WH_19"]),
    ("CAMERA", ["CAM_0", "CAM_1"]),
    ("SENSOR", ["SNS_A", "SNS_B"]),
]

# Baseline BOM by model (pick a baseline code for each part per model)
baseline_by_model = {}
for model, trims in models:
    baseline = {}
    for part_id, codes in parts:
        baseline[part_id] = codes[0]  # baseline = first code
    baseline_by_model[model] = baseline

# --- Create EO changes
start_date = datetime(2024, 5, 20)
eo_specs = [
    # (part_id, old, new, effective_date_offset_days, change_type, reason)
    ("BATTERY", "BAT_A", "BAT_B", 12, "Part Change", "Cost Reduction"),
    ("AIRBAG", "ABG_X", "ABG_Y", 18, "Part Change", "Safety Improvement"),
    ("INFOTAINMENT", "INF_V1", "INF_V2", 22, "Software Update", "Feature Upgrade"),
    ("BRAKE", "BRK_STD", "BRK_ENH", 26, "Part Change", "Performance"),
    ("SEAT", "ST_FAB", "ST_LEATHER", 30, "Option Change", "Market Demand"),
    ("WHEEL", "WH_17", "WH_18", 35, "Part Change", "Package Standardization"),
    ("CAMERA", "CAM_0", "CAM_1", 40, "Part Change", "ADAS Upgrade"),
    ("SENSOR", "SNS_A", "SNS_B", 45, "Part Change", "Supplier Update"),
    ("INFOTAINMENT", "INF_V2", "INF_V3", 60, "Software Update", "Bug Fix / Stability"),
    ("BATTERY", "BAT_B", "BAT_C", 75, "Part Change", "Range Improvement"),
]
eo_rows = []
for i, (part_id, old_code, new_code, offset, ctype, reason) in enumerate(eo_specs, start=1):
    eo_rows.append({
        "eo_id": f"EO_{i:03d}",
        "part_id": part_id,
        "old_part_code": old_code,
        "new_part_code": new_code,
        "effective_date": (start_date + timedelta(days=offset)).date().isoformat(),
        "change_type": ctype,
        "reason": reason,
        "scope": "ALL_MODELS",
        "released_by": rng.choice(["Eng_Change_Board", "Quality", "Product_Planning"]),
        "priority": rng.choice(["Low", "Medium", "High"], p=[0.35, 0.5, 0.15]),
    })
eo_df = pd.DataFrame(eo_rows).sort_values("effective_date").reset_index(drop=True)

# Helper: map part -> ordered changes by effective date
eo_df["effective_date_dt"] = pd.to_datetime(eo_df["effective_date"])
changes_by_part = {p[0]: eo_df[eo_df["part_id"] == p[0]].sort_values("effective_date_dt") for p in parts}

# --- Generate vehicle production plan
n_vehicles = 800
prod_start = datetime(2024, 5, 25)
prod_end = datetime(2024, 8, 31)
total_days = (prod_end - prod_start).days + 1

vehicle_rows = []
for idx in range(1, n_vehicles + 1):
    model, trims = random.choice(models)  # random.choice: list of tuples with variable-length lists
    trim = random.choice(trims)
    prod_date = prod_start + timedelta(days=int(rng.integers(0, total_days)))
    line = rng.choice(assembly_lines)
    shift = rng.choice(shifts, p=[0.65, 0.35])
    plant = "HMMA"
    vehicle_rows.append({
        "vehicle_id": f"V{idx:05d}",
        "plant_code": plant,
        "assembly_line": line,
        "shift": shift,
        "model": model,
        "trim": trim,
        "production_date": prod_date.date().isoformat()
    })
vehicle_df = pd.DataFrame(vehicle_rows)
vehicle_df["production_date_dt"] = pd.to_datetime(vehicle_df["production_date"])

# --- Line adoption lags per part (simulate delayed implementation of EO on some lines)
# Typical: 0-7 days lag, but some parts can lag longer
base_lag = {part_id: int(rng.integers(0, 5)) for part_id, _ in parts}
# Make a few parts more variable
for pid in ["INFOTAINMENT", "CAMERA", "SENSOR"]:
    base_lag[pid] += int(rng.integers(2, 6))

line_lag = {}
for line in assembly_lines:
    line_lag[line] = {}
    for part_id, _ in parts:
        # each line has its own lag around base +/- 0..5
        line_lag[line][part_id] = max(0, base_lag[part_id] + int(rng.integers(-1, 6)))

# --- Build BOM expected per vehicle and part
def expected_code(part_id, prod_date):
    # baseline
    code = baseline_by_model["SONATA"][part_id]  # baseline code is the first code, same across models in our simplified setup
    # apply changes in order if effective_date <= prod_date
    df = changes_by_part.get(part_id)
    if df is None or df.empty:
        return code, None
    applicable = df[df["effective_date_dt"] <= prod_date]
    if applicable.empty:
        return code, None
    # follow chain by picking latest new code
    latest = applicable.iloc[-1]
    return latest["new_part_code"], latest["effective_date"]
    
bom_rows = []
for _, v in vehicle_df.iterrows():
    prod_dt = v["production_date_dt"]
    for part_id, codes in parts:
        exp_code, valid_from = expected_code(part_id, prod_dt)
        bom_rows.append({
            "vehicle_id": v["vehicle_id"],
            "plant_code": v["plant_code"],
            "model": v["model"],
            "trim": v["trim"],
            "part_id": part_id,
            "expected_part_code": exp_code,
            "valid_from": valid_from if valid_from is not None else "",
            "expected_source": "EO_RULESET"
        })
bom_df = pd.DataFrame(bom_rows)

# --- Build production actual per vehicle and part
# Actual code equals expected unless the line has not adopted the change yet (effective_date + lag) OR random misbuild.
def actual_code(part_id, prod_dt, line):
    # determine expected and latest change effective date
    exp_code, valid_from = expected_code(part_id, prod_dt)
    # if no valid_from, actual=baseline w/ small error chance
    if not valid_from:
        act = baseline_by_model["SONATA"][part_id]
        # random misbuild: swap to alternative code with small probability
        if rng.random() < 0.005 and len([c for _, cs in parts if _ == part_id for c in cs]) > 1:
            codes = [c for pid, cs in parts if pid == part_id for c in cs]
            act = rng.choice(codes)
        return act, exp_code, ""
    eff_dt = pd.to_datetime(valid_from)
    lag_days = line_lag[line][part_id]
    adopted_dt = eff_dt + pd.Timedelta(days=lag_days)
    if prod_dt < adopted_dt:
        # not adopted yet => use old_part_code according to latest change record
        df = changes_by_part[part_id]
        latest = df[df["effective_date_dt"] <= prod_dt].iloc[-1]
        act = latest["old_part_code"]
        mismatch_reason = "EO_NOT_YET_ADOPTED"
    else:
        act = exp_code
        mismatch_reason = ""
    # add random misbuild/variant error
    if rng.random() < 0.01:
        codes = [c for pid, cs in parts if pid == part_id for c in cs]
        act = rng.choice(codes)
        if act != exp_code:
            mismatch_reason = "ASSEMBLY_ERROR"
    return act, exp_code, mismatch_reason

prod_rows = []
for _, v in vehicle_df.iterrows():
    prod_dt = v["production_date_dt"]
    for part_id, _codes in parts:
        act, exp, reason = actual_code(part_id, prod_dt, v["assembly_line"])
        prod_rows.append({
            "vehicle_id": v["vehicle_id"],
            "plant_code": v["plant_code"],
            "assembly_line": v["assembly_line"],
            "shift": v["shift"],
            "model": v["model"],
            "trim": v["trim"],
            "part_id": part_id,
            "actual_part_code": act,
            "production_date": v["production_date"],
            "mismatch_reason": reason
        })
prod_df = pd.DataFrame(prod_rows)

# Keep only required 3 CSVs (but we've embedded helpful columns in each)
eo_df_out = eo_df.drop(columns=["effective_date_dt"])
bom_df_out = bom_df
prod_df_out = prod_df

# Save (현재 작업 디렉터리 아래 data 폴더에 저장)
base_path = os.path.join(os.getcwd(), "data")
os.makedirs(base_path, exist_ok=True)
eo_path = os.path.join(base_path, "eo_changes.csv")
bom_path = os.path.join(base_path, "bom_expected.csv")
prod_path = os.path.join(base_path, "production_actual.csv")

eo_df_out.to_csv(eo_path, index=False)
bom_df_out.to_csv(bom_path, index=False)
prod_df_out.to_csv(prod_path, index=False)

# Show quick heads and file info
eo_df_out.head(), bom_df_out.head(), prod_df_out.head(), (len(eo_df_out), len(bom_df_out), len(prod_df_out)), (eo_path, bom_path, prod_path)


(    eo_id       part_id old_part_code new_part_code effective_date  \
 0  EO_001       BATTERY         BAT_A         BAT_B     2024-06-01   
 1  EO_002        AIRBAG         ABG_X         ABG_Y     2024-06-07   
 2  EO_003  INFOTAINMENT        INF_V1        INF_V2     2024-06-11   
 3  EO_004         BRAKE       BRK_STD       BRK_ENH     2024-06-15   
 4  EO_005          SEAT        ST_FAB    ST_LEATHER     2024-06-19   
 
        change_type              reason       scope       released_by priority  
 0      Part Change      Cost Reduction  ALL_MODELS  Eng_Change_Board   Medium  
 1      Part Change  Safety Improvement  ALL_MODELS  Product_Planning     High  
 2  Software Update     Feature Upgrade  ALL_MODELS  Eng_Change_Board      Low  
 3      Part Change         Performance  ALL_MODELS  Product_Planning     High  
 4    Option Change       Market Demand  ALL_MODELS  Product_Planning   Medium  ,
   vehicle_id plant_code   model trim       part_id expected_part_code  \
 0     V000