In [1]:
import os
import re
import numpy as np
import pandas as pd
from collections import defaultdict

# =========================
# Paths (adjust as needed)
# =========================
folder_path = 'All_Electric_Cascades'   # cascades directory
mpg_path    = "filtered_mpg_noGTPD.csv"                                  # cleaned MPG file with 'Model','Year','MPG (avg)'

# =========================
# Assumptions / Params
# =========================
CURRENT_YEAR   = 2025
HORIZON_YEARS  = 10
price_of_gas   = [3.05, 3.00, 2.96, 2.93, 2.93, 2.93, 2.91, 2.89, 2.90, 2.87]  # len >= HORIZON_YEARS

# Per-mile maintenance ($/mile) by age 1..15; linear tail beyond 15
ages = np.arange(1, 16)
costs = np.array([0.07, 0.08, 0.11, 0.14, 0.17, 0.20, 0.23, 0.26, 0.30, 0.31,
                  0.32, 0.33, 0.34, 0.35, 0.36])
m_lin, b_lin = np.polyfit(ages, costs, 1)

def cost_per_mile(age_years: int) -> float:
    if age_years < 1:
        return float(costs[0])
    if age_years <= 15:
        return float(costs[int(age_years) - 1])
    return float(m_lin * age_years + b_lin)

# =========================
# Load MPG master (uses MPG (avg))
# =========================
mpg_data = pd.read_csv(mpg_path) if mpg_path.lower().endswith('.csv') else pd.read_excel(mpg_path)
for c in ['Model', 'Year', 'MPG (avg)']:
    if c not in mpg_data.columns:
        raise ValueError(f"MPG file missing required column: {c}")

mpg_data['Model']     = mpg_data['Model'].astype(str).str.strip()
mpg_data['Year']      = pd.to_numeric(mpg_data['Year'], errors='coerce')
mpg_data['MPG (avg)'] = pd.to_numeric(mpg_data['MPG (avg)'], errors='coerce')

# Build lookup dict for speed
mpg_lookup = {(str(r['Model']).strip(), int(r['Year'])): float(r['MPG (avg)'])
              for _, r in mpg_data.dropna(subset=['Model', 'Year', 'MPG (avg)']).iterrows()}

# =========================
# Helpers aligned to your original flow
# =========================
def parse_dept_model_from_filename(filename: str):
    base = os.path.splitext(filename)[0]
    m = re.match(r'(.+)_([A-Za-z0-9\s\-]+)$', base)
    if not m:
        return None, None
    return m.group(1).strip(), m.group(2).strip()

def detect_step_indices(df: pd.DataFrame):
    idxs, i = [], 1
    while f"Step {i} Year" in df.columns:
        idxs.append(i)
        i += 1
    return idxs

# =========================
# Accumulators & Logs
# =========================
dept_totals        = defaultdict(lambda: {'Fuel_10y': 0.0, 'Maint_10y': 0.0})
dept_model_totals  = defaultdict(lambda: {'Fuel_10y': 0.0, 'Maint_10y': 0.0})
detail_rows        = []
join_missing_pairs = defaultdict(int)   # (Model, Year) -> count of misses
skipped_pairs_log  = []                 # why a pair was skipped

# =========================
# Main loop
# =========================
if len(price_of_gas) < HORIZON_YEARS:
    raise ValueError("price_of_gas must have >= HORIZON_YEARS entries.")

for filename in os.listdir(folder_path):
    if not filename.endswith('.csv'):
        continue
    if filename.endswith('__log.csv'):
        continue
    if filename in {"Department_Savings_Summary.csv",
                    "Department_Model_Savings_Summary.csv",
                    "Savings_Detail.csv",
                    "MPG_Join_Diagnostics.csv",
                    "Skipped_Pairs_Log.csv"}:
        continue

    cascades = pd.read_csv(os.path.join(folder_path, filename))
    cascades.columns = cascades.columns.str.strip()

    # Department/Model from file (fallback to filename)
    dept_in = cascades.get("Department", pd.Series([None])).iloc[0]
    model_in = cascades.get("Model", pd.Series([None])).iloc[0]
    dept_fn, model_fn = parse_dept_model_from_filename(filename)
    department   = dept_in if pd.notna(dept_in) else dept_fn
    vehicle_model = model_in if pd.notna(model_in) else model_fn
    if department is None or vehicle_model is None:
        continue

    step_indices = detect_step_indices(cascades)
    if len(step_indices) < 2:
        continue

    # Evaluate all adjacent pairs (Step k -> Step k+1)
    for _, row in cascades.iterrows():
        for k in range(1, len(step_indices)):
            i, j = k, k+1
            y_new = row.get(f"Step {i} Year", np.nan)
            y_old = row.get(f"Step {j} Year", np.nan)
            eg_new = row.get(f"Step {i} Age*Gallons", np.nan)  # not strictly needed for fuel savings
            eg_old = row.get(f"Step {j} Age*Gallons", np.nan)

            if pd.isna(y_new) or pd.isna(y_old):
                continue
            y_new = int(y_new); y_old = int(y_old)

            mpg_new = mpg_lookup.get((vehicle_model, y_new))
            mpg_old = mpg_lookup.get((vehicle_model, y_old))
            if (mpg_new is None) or (mpg_old is None) or not np.isfinite(mpg_new) or not np.isfinite(mpg_old):
                if (vehicle_model, y_new) not in mpg_lookup:
                    join_missing_pairs[(vehicle_model, y_new)] += 1
                if (vehicle_model, y_old) not in mpg_lookup:
                    join_missing_pairs[(vehicle_model, y_old)] += 1
                skipped_pairs_log.append({
                    "Department": department, "Model": vehicle_model,
                    "Pair": f"Step {i}->{j}", "Year_New": y_new, "Year_Old": y_old,
                    "Reason": "missing_mpg"
                })
                continue

            # 1) Annual gallons for the OLD vehicle from Age×Gallons / age
            age_old = CURRENT_YEAR - y_old
            if age_old <= 0:
                skipped_pairs_log.append({
                    "Department": department, "Model": vehicle_model,
                    "Pair": f"Step {i}->{j}", "Year_New": y_new, "Year_Old": y_old,
                    "Reason": "age_old_le_0"
                })
                continue
            try:
                gals_old_per_year = float(eg_old) / float(age_old)
            except Exception:
                skipped_pairs_log.append({
                    "Department": department, "Model": vehicle_model,
                    "Pair": f"Step {i}->{j}", "Year_New": y_new, "Year_Old": y_old,
                    "Reason": "bad_agegals_old"
                })
                continue

            # 2) Annual miles (old & new), 3) Efficiency gain & gallons saved
            miles_old = gals_old_per_year * mpg_old
            miles_new = gals_old_per_year * mpg_new
            if miles_old <= 0:
                skipped_pairs_log.append({
                    "Department": department, "Model": vehicle_model,
                    "Pair": f"Step {i}->{j}", "Year_New": y_new, "Year_Old": y_old,
                    "Reason": "nonpositive_miles_old"
                })
                continue
            efficiency_gain = miles_new / miles_old               # = mpg_new / mpg_old
            gallons_saved_per_year = gals_old_per_year * (1 - (1/efficiency_gain))  # = gals_old * (1 - mpg_old/mpg_new)

            # 4) Fuel savings stream ($)
            fuel_10y = sum(price_of_gas[t] * gallons_saved_per_year for t in range(HORIZON_YEARS))

            # 5–6) Avoided M&R at constant miles (hold miles at old baseline)
            miles_baseline = miles_old
            maint_10y = 0.0
            new_age_0 = CURRENT_YEAR - y_new
            for t in range(HORIZON_YEARS):
                old_age_t = age_old + t
                new_age_t = new_age_0 + t
                cpm_old = cost_per_mile(old_age_t)
                cpm_new = cost_per_mile(new_age_t)
                maint_10y += (cpm_old - cpm_new) * miles_baseline

            # Aggregate
            dept_totals[department]['Fuel_10y'] += fuel_10y
            dept_totals[department]['Maint_10y'] += maint_10y
            dept_model_totals[(department, vehicle_model)]['Fuel_10y'] += fuel_10y
            dept_model_totals[(department, vehicle_model)]['Maint_10y'] += maint_10y

            # Detail row
            detail_rows.append({
                "Department": department,
                "Model": vehicle_model,
                "Pair": f"Step {i}->{j}",
                "Year_New": y_new,
                "Year_Old": y_old,
                "AnnualGallons_Old": round(gals_old_per_year, 4),
                "MPG_New": round(mpg_new, 3),
                "MPG_Old": round(mpg_old, 3),
                "FuelSavings_10y": round(fuel_10y, 2),
                "MaintSavings_10y": round(maint_10y, 2),
                "TotalSavings_10y": round(fuel_10y + maint_10y, 2)
            })

# =========================
# Write outputs
# =========================
# Department summary
dept_rows = []
for dept, vals in dept_totals.items():
    total = vals['Fuel_10y'] + vals['Maint_10y']
    dept_rows.append({
        "Department": dept,
        "FuelSavings_10y": round(vals['Fuel_10y'], 2),
        "MaintSavings_10y": round(vals['Maint_10y'], 2),
        "Total 10-Year Savings ($)": round(total, 2)
    })
dept_df = pd.DataFrame(dept_rows).sort_values("Total 10-Year Savings ($)", ascending=False)
dept_df.to_csv(os.path.join("Dept Savings Results", "Department_Savings_Summary.csv"), index=False)

# Per Department–Model breakdown
dm_rows = []
for (dept, model), vals in dept_model_totals.items():
    total = vals['Fuel_10y'] + vals['Maint_10y']
    dm_rows.append({
        "Department": dept,
        "Model": model,
        "FuelSavings_10y": round(vals['Fuel_10y'], 2),
        "MaintSavings_10y": round(vals['Maint_10y'], 2),
        "Total 10-Year Savings ($)": round(total, 2)
    })
dm_df = pd.DataFrame(dm_rows).sort_values(["Department", "Total 10-Year Savings ($)"], ascending=[True, False])
dm_df.to_csv(os.path.join("Dept Savings Results", "Department_Model_Savings_Summary.csv"), index=False)

# Detail & diagnostics
if detail_rows:
    pd.DataFrame(detail_rows).to_csv(os.path.join("Dept Savings Results", "Savings_Detail.csv"), index=False)

if join_missing_pairs:
    jd = [{"Model": m, "Year": y, "MissingCount": c} for (m, y), c in join_missing_pairs.items()]
    pd.DataFrame(jd).sort_values(["MissingCount", "Model", "Year"], ascending=[False, True, True]) \
        .to_csv(os.path.join("Dept Savings Results", "MPG_Join_Diagnostics.csv"), index=False)

if skipped_pairs_log:
    pd.DataFrame(skipped_pairs_log).to_csv(os.path.join(folder_path, "Skipped_Pairs_Log.csv"), index=False)

print("Summary saved successfully.")


OSError: Cannot save file into a non-existent directory: 'Dept Savings Results'