In [1]:
# Imports and Setup
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

# Folder paths
energy_dir  = Path("energyData_clean")
temp_dir    = Path("temperatureData_filled")    # gap-free daily temps
output_dir  = Path("merged_energy_temp")
output_dir.mkdir(exist_ok=True)

In [2]:
# Plotting Helper Function
def plot_temperature_trend(df: pd.DataFrame, region: str) -> None:
    plt.figure(figsize=(12, 4))
    plt.plot(df.index, df["Estimated_Temp_C"], lw=0.6, label="Estimated Temp (°C)")
    plt.title(f"{region} – Temperature mapped onto energy index")
    plt.xlabel("Date-Time")
    plt.ylabel("Temp (°C)")
    plt.grid(True)
    plt.tight_layout()
    plt.legend()
    plt.show()

In [3]:
# Process Each Energy File
for efile in energy_dir.glob("*_hourly_daily.csv"):
    region = efile.stem.split("_")[0]          # e.g. "AEP"

    tfile = temp_dir / f"{region}_filled.csv"
    if not tfile.exists():
        print(f"⚠️  No filled temp file for {region}")
        continue

    # ── Load data
    energy_df = pd.read_csv(efile, index_col=0, parse_dates=True).sort_index()
    temp_df   = pd.read_csv(tfile, index_col=0, parse_dates=True).sort_index()

    # ── Clip energy rows to temperature coverage
    energy_df = energy_df.loc[
        (energy_df.index >= temp_df.index.min()) &
        (energy_df.index <= temp_df.index.max())
    ]

    # ── Map temperature by calendar day (ignore time-of-day)
    temp_series = temp_df["Temp_C"]
    energy_df["Estimated_Temp_C"] = energy_df.index.normalize().map(temp_series)

    # ── Validation
    nan_count = energy_df["Estimated_Temp_C"].isna().sum()
    print(f"🔍 {region}: {nan_count} missing temperature values after merge")
    if nan_count:
        print("⚠️  Unexpected NaNs – check temp file completeness or date range.")

    # plot_temperature_trend(energy_df, region)   # ← uncomment for a quick visual

    # ── Save merged file
    out_path = output_dir / f"{region}_merged.csv"
    energy_df.to_csv(out_path)
    print(f"✅  Saved merged file → {out_path}")

🔍 AEP: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\AEP_merged.csv
🔍 COMED: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\COMED_merged.csv
🔍 DAYTON: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\DAYTON_merged.csv
🔍 DEOK: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\DEOK_merged.csv
🔍 DOM: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\DOM_merged.csv
🔍 DUQ: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\DUQ_merged.csv
🔍 EKPC: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\EKPC_merged.csv
🔍 FE: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\FE_merged.csv
🔍 NI: 0 missing temperature values after merge
✅  Saved merged file → merged_energy_temp\NI_merged.csv
🔍 PJME: 0 missing temperature values after me