In [1]:
# Imports and Setup
import matplotlib.pyplot as plt
import pandas as pd
import os

# Folder paths
energy_dir = "energyData_clean"
temp_dir = "temperatureData_clean"
output_dir = "merged_energy_temp"
os.makedirs(output_dir, exist_ok=True)

In [2]:
# Interpolation Helper Function
def interpolate_temp(temp_df, target_date):
    if target_date < temp_df.index.min() or target_date > temp_df.index.max():
        return None

    before_df = temp_df[temp_df.index <= target_date]
    after_df = temp_df[temp_df.index > target_date]

    if before_df.empty or after_df.empty:
        return None

    before = before_df.iloc[-1]
    after = after_df.iloc[0]

    total_days = (after.name - before.name).days
    if total_days == 0:
        return before['Temp_C']

    delta_temp = after['Temp_C'] - before['Temp_C']
    offset_days = (target_date - before.name).days

    return before['Temp_C'] + (delta_temp / total_days) * offset_days

In [3]:
# Plotting Helper Function
def plot_temperature_trend(df, region):
    plt.figure(figsize=(12, 4))
    plt.plot(df.index, df['Estimated_Temp_C'], label='Estimated Temp (°C)', color='tab:blue')
    plt.title(f"{region} - Interpolated Temperature")
    plt.xlabel("Date")
    plt.ylabel("Temp (°C)")
    plt.grid(True)
    plt.tight_layout()
    plt.legend()
    plt.show()

In [4]:
# Process Each Energy File
for filename in os.listdir(energy_dir):
    if not filename.endswith("_hourly_daily.csv"):
        continue

    region = filename.split("_")[0]
    energy_path = os.path.join(energy_dir, filename)
    temp_files = [f for f in os.listdir(temp_dir) if f.startswith(region + "_") and f.endswith(".csv")]
    if not temp_files:
        print(f"⚠️ No temp file found for {region}")
        continue

    temp_path = os.path.join(temp_dir, temp_files[0])

    # Load Data
    energy_df = pd.read_csv(energy_path, index_col=0, parse_dates=True)
    temp_df = pd.read_csv(temp_path, parse_dates=["Date"])
    temp_df = temp_df.groupby("Date")["Temp_C"].mean().to_frame()
    temp_df.index = pd.to_datetime(temp_df.index)

    # print(f"Energy Date Range for {region}: {energy_df.index.min().date()} → {energy_df.index.max().date()}")
    # print(f"Temperature Date Range for {region}: {temp_df.index.min().date()} → {temp_df.index.max().date()}")
    
    # Clip energy data to temperature date range
    energy_df = energy_df[(energy_df.index > temp_df.index.min()) & (energy_df.index < temp_df.index.max())]


    # Interpolate Temperature
    energy_df["Estimated_Temp_C"] = energy_df.index.map(lambda d: interpolate_temp(temp_df, d))

    # Cell 5: Validation - Check for NaNs
    nan_count = energy_df['Estimated_Temp_C'].isna().sum()
    print(f"🔍 {region}: {nan_count} missing temperature values")

    if nan_count > 0:
        print("⚠️ Some temperature values were not interpolated. Check date coverage.")

    # plot_temperature_trend(energy_df, region)

    # Save Output
    output_path = os.path.join(output_dir, f"{region}_merged.csv")
    energy_df.to_csv(output_path)
    print(f"✅ Saved merged file: {output_path}")

🔍 AEP: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\AEP_merged.csv
🔍 COMED: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\COMED_merged.csv
🔍 DAYTON: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\DAYTON_merged.csv
🔍 DEOK: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\DEOK_merged.csv
🔍 DOM: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\DOM_merged.csv
🔍 DUQ: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\DUQ_merged.csv
🔍 EKPC: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\EKPC_merged.csv
🔍 FE: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\FE_merged.csv
🔍 NI: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\NI_merged.csv
🔍 PJME: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\PJME_merged.csv
🔍 PJMW: 0 missing temperature values
✅ Saved merged file: merged_energy_temp\P