# Expand the Heat Wave and Cold Snap Libraries to Include Unique IDs and the Number of Regions Impacted


The heat wave and cold snap libraries are based on data provided by Alfred Wan on 10-Nov 2025. He used a definition that aligns with the draft NERC TPL-08 standard. Heat waves are events in which the 3-day rolling average of daily maximum temperature exceeds the 97.5 percentile. Cold snaps which are based on 3-day rolling average of daily minimum temperatures below the 2.5 percentile. County-level temperatures were population-weighted in each TPL-08 region to create hourly time series by region.

In [63]:
# Start by importing the packages we need:
import os
import warnings

import pandas as pd
import numpy as np

from datetime import timedelta


## Suppress Future Warnings


In [64]:
# Suppress future warnings:
warnings.simplefilter(action='ignore', category=FutureWarning)


## Set the Directory Structure

In [65]:
# Identify the top-level directory and the subdirectory where the data will be stored:
hw_cs_data_dir =  '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/'
data_output_dir = '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/'


## Process the Heat Wave and Cold Snap Library


In [66]:
# Define a function to add new information to the existing HW and CS libraries:
def expand_hw_cs_libraries(hw_cs: str, time_window: int, hw_cs_data_dir: str, data_output_dir: str):

    # Read in NERC region name file and rename some columns:
    nerc = pd.read_csv((hw_cs_data_dir + 'nerc_tpl08_region_names.csv'))
    nerc.rename(columns={'number': 'NERC', 'short_name': 'Region'}, inplace=True)
        
    if hw_cs == 'HW':
       # Read in the raw data from the heat wave library:
       temp_df = pd.read_csv((hw_cs_data_dir + 'Tmax_based_heat_wave_library.csv'))
    elif hw_cs == 'CS':
       # Read in the raw data from the cold snap library:
       temp_df = pd.read_csv((hw_cs_data_dir + 'Tmin_based_cold_snap_library.csv'))
 
    # Rename the TPL-08 regions to match the "short_name" in the nerc_tpl08_region_names.csv file:
    temp_df.loc[(temp_df['NERC_ID'] == 'California'),'Region'] = 'CA'
    temp_df.loc[(temp_df['NERC_ID'] == 'ERCOT'),'Region'] = 'ERCOT'
    temp_df.loc[(temp_df['NERC_ID'] == 'Florida'),'Region'] = 'FL'
    temp_df.loc[(temp_df['NERC_ID'] == 'Great Basin'),'Region'] = 'GB'
    temp_df.loc[(temp_df['NERC_ID'] == 'ISONE'),'Region'] = 'ISONE'
    temp_df.loc[(temp_df['NERC_ID'] == 'Maritimes'),'Region'] = 'MT'
    temp_df.loc[(temp_df['NERC_ID'] == 'MISO-N'),'Region'] = 'MISO-N'
    temp_df.loc[(temp_df['NERC_ID'] == 'MISO-S'),'Region'] = 'MISO-S'
    temp_df.loc[(temp_df['NERC_ID'] == 'NYISO'),'Region'] = 'NYISO'
    temp_df.loc[(temp_df['NERC_ID'] == 'Pacific Northwest'),'Region'] = 'PNW'
    temp_df.loc[(temp_df['NERC_ID'] == 'PJM'),'Region'] = 'PJM'
    temp_df.loc[(temp_df['NERC_ID'] == 'Rocky Mtn'),'Region'] = 'RM'
    temp_df.loc[(temp_df['NERC_ID'] == 'SERC'),'Region'] = 'SERC'
    temp_df.loc[(temp_df['NERC_ID'] == 'Southwest'),'Region'] = 'SW'
    temp_df.loc[(temp_df['NERC_ID'] == 'SPP-N'),'Region'] = 'SPP-N'
    temp_df.loc[(temp_df['NERC_ID'] == 'SPP-S'),'Region'] = 'SPP-S'

    # Merge in the NERC region name:
    temp_df = temp_df.merge(nerc, on='Region', how='left')

    # Rename the temperature variable to something more generic:
    if hw_cs == 'HW':
       temp_df.rename(columns={'highest_temperature': 'T_Max_Min'}, inplace=True)
    elif hw_cs == 'CS':
       temp_df.rename(columns={'lowest_temperature': 'T_Max_Min'}, inplace=True) 

    # Convert the temperatures from Kelvin to Fahrenheit:
    temp_df['T_Max_Min'] = ((1.8 * (temp_df['T_Max_Min'] - 273)) + 32).round(2)
    
    # Only keep the columns we need:
    temp_df = temp_df[['NERC','Region','start_date','end_date','centroid_date','T_Max_Min','duration']].copy()
      
    # Rename the columns for consistency:
    temp_df.rename(columns={'start_date': 'Start', 'end_date': 'End', 'centroid_date': 'Center', 'duration': 'Duration'}, inplace=True)
    
    # Set all dates to datetimes variable and extract the day of year for the centroid date:
    temp_df['Start'] = pd.to_datetime(temp_df['Start'])
    temp_df['End'] = pd.to_datetime(temp_df['End'])
    temp_df['Center'] = pd.to_datetime(temp_df['Center'])
    temp_df['Center_DOY'] = temp_df['Center'].dt.day_of_year

    # Add a new column with a unique row number and an empty column to the dataframe to store the unique ID:
    temp_df['Row_Number'] = np.arange(len(temp_df))
    temp_df['UID_Temp'] = np.nan

    # Make a list of all of the unique NERC regions and definitions in "temp_df":
    nerc_regions = temp_df['NERC'].unique()
    
    # Loop over the unique NERC region and definition combinations:
    for i in range(len(nerc_regions)):
                  
        # Subset to just the data for that unique combination:
        subset_df = temp_df[temp_df['NERC'].isin([nerc_regions[i]])].copy()

        # If the subset isn't empty then assign a unique ID for each event with that combination:
        if subset_df.empty == False:
          
            # Add a new column with a unique row number:
            subset_df['Event_Number'] = np.arange(len(subset_df)) + 1

            # Add a new column with a unique identifier:
            if hw_cs == 'HW': 
               subset_df['UID'] = ('HW_NERC' + subset_df['NERC'].astype(str) + '_Event' + subset_df['Event_Number'].astype(str))
            else:
               subset_df['UID'] = ('CS_NERC' + subset_df['NERC'].astype(str) + '_Event' + subset_df['Event_Number'].astype(str)) 

            # Loop over the subset and assign the UID back into the main dataframe:
            for row in range(len(subset_df)):
                temp_df.loc[temp_df['Row_Number'] == subset_df['Row_Number'].iloc[row], 'UID_Temp'] = subset_df['UID'].iloc[row]

            # Clean up and move to the next combination:
            del subset_df, row

    # Rename the UID variable:
    temp_df.rename(columns={'UID_Temp': 'UID'}, inplace=True)

    # Loop over the rows of the dataframe and find number of corresponding events within X days of the event in a given row:
    for row in range(len(temp_df)):
        # Subset the data to just events in the library whose centroid date is within X days of centroid date for the event in a given row:
        time_subset_df = temp_df[(pd.to_datetime(temp_df['Center']) > (pd.to_datetime(temp_df['Center'].iloc[row]) - pd.Timedelta(time_window, 'd'))) & 
                                 (pd.to_datetime(temp_df['Center']) < (pd.to_datetime(temp_df['Center'].iloc[row]) + pd.Timedelta(time_window, 'd')))].copy()

        # Store the size of that subset:
        if time_subset_df.empty == False:
           temp_df.at[row, 'Regions_Impacted'] = time_subset_df.shape[0]
        else:
           temp_df.at[row, 'Regions_Impacted'] = 1
            
    # Make a copy of the necessary variables to output:
    output_df = temp_df[['UID', 'NERC', 'Region', 'Start', 'End', 'Center', 'Center_DOY', 'T_Max_Min', 'Duration', 'Regions_Impacted']].copy(deep=False)

    # Sort the date by region number and date then reset the index:
    output_df.sort_values(by=['NERC', 'Start'], inplace=True)
    output_df.reset_index(inplace=True, drop=True)
    
    # Write out the dataframe to a .csv file:
    if hw_cs == 'HW':
       output_df.to_csv((os.path.join(data_output_dir, 'hw_library_expanded.csv')), sep=',', index=False)
    if hw_cs == 'CS':
       output_df.to_csv((os.path.join(data_output_dir, 'cs_library_expanded.csv')), sep=',', index=False)
        
    return output_df


In [68]:
# Test the function:
output_df = expand_hw_cs_libraries(hw_cs = 'HW',
                                   time_window = 3, # Time (in +/- days) over which to identify coincident events across NERC regions
                                   hw_cs_data_dir = hw_cs_data_dir, 
                                   data_output_dir = data_output_dir)

output_df


Unnamed: 0,UID,NERC,Region,Start,End,Center,Center_DOY,T_Max_Min,Duration,Regions_Impacted
0,HW_NERC1_Event1,1,CA,1980-07-23,1980-08-02,1980-07-28,210,94.16,11,5.0
1,HW_NERC1_Event2,1,CA,1980-08-10,1980-08-11,1980-08-10,223,90.64,2,3.0
2,HW_NERC1_Event3,1,CA,1980-09-30,1980-10-03,1980-10-01,275,93.17,4,1.0
3,HW_NERC1_Event4,1,CA,1981-08-06,1981-08-09,1981-08-07,219,92.93,4,2.0
4,HW_NERC1_Event5,1,CA,1981-08-27,1981-08-28,1981-08-27,239,92.01,2,1.0
...,...,...,...,...,...,...,...,...,...,...
1723,HW_NERC16_Event96,16,SW,2024-06-06,2024-06-07,2024-06-06,158,99.58,2,2.0
1724,HW_NERC16_Event97,16,SW,2024-06-13,2024-06-13,2024-06-13,165,99.01,1,2.0
1725,HW_NERC16_Event98,16,SW,2024-07-03,2024-07-14,2024-07-08,190,101.40,12,8.0
1726,HW_NERC16_Event99,16,SW,2024-08-05,2024-08-05,2024-08-05,218,98.97,1,5.0
