In [1]:
# Imports & load full city-level dataset
import pandas as pd
import os

city_path = "temperatureData/GlobalLandTemperaturesByCity.csv"
df_city = pd.read_csv(city_path, parse_dates=["dt"])

# Keep U.S. rows and rename columns
df_city = (
    df_city[df_city["Country"] == "United States"]
      .rename(columns={"dt": "Date",
                       "City": "Location",
                       "AverageTemperature": "Temp_C"})
      [["Date", "Location", "Temp_C"]]
      .dropna(subset=["Temp_C"])
)

In [2]:
# Define cities associated with each PJM region
region_to_cities = {
    "AEP": ["Akron", "Columbus", "Dayton", "Charleston",
            "Indianapolis", "Lexington Fayette", "Huntington"],
    "COMED": ["Chicago", "Rockford", "Aurora", "Naperville", "Joliet"],
    "DAYTON": ["Dayton"],
    "DEOK": ["Cincinnati", "Lexington Fayette"],
    "DOM": ["Charlotte", "Raleigh", "Richmond",
            "Virginia Beach", "Washington"],
    "DUQ": ["Pittsburgh"],
    "EKPC": ["Lexington Fayette", "Louisville"],
    "FE": ["Akron", "Allentown", "Cleveland", "Newark", "Baltimore"],
    "NI": ["South Bend", "Fort Wayne"],
    "PJM_Load": ["Columbus", "Detroit", "Charlotte", "Chicago", "Baltimore",
                 "Philadelphia", "Cleveland", "Indianapolis", "Louisville",
                 "Nashville", "Jersey City", "Akron"],
    "PJME": ["Baltimore", "Newark", "Jersey City", "Washington"],
    "PJMW": ["Charleston", "Lexington Fayette", "Columbus", "Dayton"],
}



In [3]:
# Create output folder
os.makedirs("temperatureData_clean", exist_ok=True)

# Loop through all cleaned energy files
for region, cities in region_to_cities.items():

    region_temp = df_city[df_city["Location"].isin(cities)].copy()

    if region_temp.empty:
        print(f"⚠️  No temperature rows found for {region}")
        continue

    date_min, date_max = region_temp["Date"].min(), region_temp["Date"].max()
    date_str_min = date_min.strftime("%Y-%m-%d")
    date_str_max = date_max.strftime("%Y-%m-%d")

    # use "PJM" instead of "PJM_Load" in filename
    file_region = "PJM" if region == "PJM_Load" else region
    outfile = f"temperatureData_clean/{file_region}_{date_str_min}_to_{date_str_max}.csv"

    region_temp.to_csv(outfile, index=False)
    print(f"✅  Saved {file_region} temperature data → {outfile}")

✅  Saved AEP temperature data → temperatureData_clean/AEP_1743-11-01_to_2013-09-01.csv
✅  Saved COMED temperature data → temperatureData_clean/COMED_1743-11-01_to_2013-09-01.csv
✅  Saved DAYTON temperature data → temperatureData_clean/DAYTON_1743-11-01_to_2013-09-01.csv
✅  Saved DEOK temperature data → temperatureData_clean/DEOK_1743-11-01_to_2013-09-01.csv
✅  Saved DOM temperature data → temperatureData_clean/DOM_1743-11-01_to_2013-09-01.csv
✅  Saved DUQ temperature data → temperatureData_clean/DUQ_1743-11-01_to_2013-09-01.csv
✅  Saved EKPC temperature data → temperatureData_clean/EKPC_1743-11-01_to_2013-09-01.csv
✅  Saved FE temperature data → temperatureData_clean/FE_1743-11-01_to_2013-09-01.csv
✅  Saved NI temperature data → temperatureData_clean/NI_1743-11-01_to_2013-09-01.csv
✅  Saved PJM temperature data → temperatureData_clean/PJM_1743-11-01_to_2013-09-01.csv
✅  Saved PJME temperature data → temperatureData_clean/PJME_1743-11-01_to_2013-09-01.csv
✅  Saved PJMW temperature data 