# Process Time Series of Population-Weighted Temperature for Each NERC TPL-08 Region

The raw population-weighted temperature data was provided by Heng Wan on 12-Nov 2025. If we eventually need the hourly temperature data by region we could go back to him for this information.

In [1]:
# Start by importing the packages we need:
import os
import datetime

import pandas as pd


## Set the Directory Structure

In [2]:
# Identify the data input directory:
temperature_data_dir =  '/Users/burl878/Documents/Code/code_repos/burleyson-etal_2026_tbd/data/temperature_data/'


## Process the Time Series Data


In [3]:
# Define a function to process the time series of temperature for a given NERC region:
def process_temperature_time_series(temperature_data_dir: str):
    
    # Read in the raw time series data for all NERC regions:
    temp_df = pd.read_csv((temperature_data_dir + 'NERC_average_pop_TPL08.csv'))

    # Rename the TPL-08 regions to match the "short_name" in the nerc_tpl08_region_names.csv file:
    temp_df.loc[(temp_df['NERC'] == 'California'),'Region'] = 'CA'
    temp_df.loc[(temp_df['NERC'] == 'ERCOT'),'Region'] = 'ERCOT'
    temp_df.loc[(temp_df['NERC'] == 'Florida'),'Region'] = 'FL'
    temp_df.loc[(temp_df['NERC'] == 'Great Basin'),'Region'] = 'GB'
    temp_df.loc[(temp_df['NERC'] == 'ISONE'),'Region'] = 'ISONE'
    temp_df.loc[(temp_df['NERC'] == 'Maritimes'),'Region'] = 'MT'
    temp_df.loc[(temp_df['NERC'] == 'MISO-N'),'Region'] = 'MISO-N'
    temp_df.loc[(temp_df['NERC'] == 'MISO-S'),'Region'] = 'MISO-S'
    temp_df.loc[(temp_df['NERC'] == 'NYISO'),'Region'] = 'NYISO'
    temp_df.loc[(temp_df['NERC'] == 'Pacific Northwest'),'Region'] = 'PNW'
    temp_df.loc[(temp_df['NERC'] == 'PJM'),'Region'] = 'PJM'
    temp_df.loc[(temp_df['NERC'] == 'Rocky Mtn'),'Region'] = 'RM'
    temp_df.loc[(temp_df['NERC'] == 'SERC'),'Region'] = 'SERC'
    temp_df.loc[(temp_df['NERC'] == 'Southwest'),'Region'] = 'SW'
    temp_df.loc[(temp_df['NERC'] == 'SPP-N'),'Region'] = 'SPP-N'
    temp_df.loc[(temp_df['NERC'] == 'SPP-S'),'Region'] = 'SPP-S'
    
    # Set 'date' to a datetime variable: 
    temp_df['Date'] = pd.to_datetime(temp_df['date'])
    
    # Convert from Kelvin to Fahrenheit and round off the values:
    temp_df['T_Min'] = (((temp_df['T_min'] - 273.15)*1.8)+32).round(2)
    temp_df['T_Mean'] = (((temp_df['T_mean'] - 273.15)*1.8)+32).round(2)
    temp_df['T_Max'] = (((temp_df['T_max'] - 273.15)*1.8)+32).round(2)

    # Only keep the columns we need:
    output_df = temp_df[['Region','Date','T_Min','T_Mean','T_Max']].copy()

    # Sort by region and then date:
    output_df = output_df.sort_values(['Region', 'Date'])

    # Reset the index value:
    output_df.reset_index(inplace=True, drop=True)
    
    # Set the output filename:
    output_filename = ('NERC_Region_Daily_Temperature_1980_to_2024.csv')
        
    # Write out the dataframe to a .csv file:
    output_df.to_csv((os.path.join(temperature_data_dir, output_filename)), sep=',', index=False)
    
    return output_df


In [4]:
# Test the function:
output_df = process_temperature_time_series(temperature_data_dir = temperature_data_dir)

output_df


Unnamed: 0,Region,Date,T_Min,T_Mean,T_Max
0,CA,1980-01-01,47.43,51.47,56.80
1,CA,1980-01-02,41.88,47.89,57.35
2,CA,1980-01-03,43.34,48.38,56.84
3,CA,1980-01-04,42.60,47.63,55.79
4,CA,1980-01-05,43.85,48.36,55.37
...,...,...,...,...,...
262987,SW,2024-12-27,37.19,46.15,58.79
262988,SW,2024-12-28,38.72,48.08,60.67
262989,SW,2024-12-29,41.93,51.39,66.56
262990,SW,2024-12-30,43.06,52.66,64.09
