# Expand the Heat Wave and Cold Snap Libraries to Include Peak Loads and Regions Impacted


In [1]:
# Start by importing the packages we need:
import os
import warnings

import pandas as pd
import numpy as np

from datetime import timedelta


## Suppress Future Warnings


In [2]:
# Suppress future warnings:
warnings.simplefilter(action='ignore', category=FutureWarning)


## Set the Directory Structure

In [3]:
# Identify the top-level directory and the subdirectory where the data will be stored:
hw_cs_data_dir =  '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/'
data_output_dir = '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/'


## Process the Heat Wave and Cold Snap Library


In [7]:
# Define a function to add new information to the existing HW and CS libraries:
def expand_hw_cs_libraries(hw_cs: str, hw_cs_data_dir: str, data_output_dir: str):

    # Read in NERC region name file and rename some columns:
    nerc = pd.read_csv((hw_cs_data_dir + 'nerc_region_names.csv'))
    nerc.rename(columns={'number': 'NERC', 'short_name': 'Region'}, inplace=True)
    nerc['NERC'] = nerc['NERC'].apply(pd.to_numeric)
    
    if hw_cs == 'HW':
       # Read in the raw data from the heat wave library:
       temp_df = pd.read_csv((hw_cs_data_dir + 'hw_library.csv'))
    elif hw_cs == 'CS':
       # Read in the raw data from the cold snap library:
       temp_df = pd.read_csv((hw_cs_data_dir + 'cs_library.csv'))
    
    # Rename the NERC region variable and strip the "NERC" string:
    temp_df.rename(columns={'NERC_ID': 'NERC'}, inplace=True)
    temp_df['NERC'] = temp_df['NERC'].str.replace('NERC', '', regex=True)
    temp_df['NERC'] = temp_df['NERC'].apply(pd.to_numeric)
    temp_df['definition'] = temp_df['definition'].str.replace('def', '', regex=True)

    # Merge in the NERC region name:
    temp_df = temp_df.merge(nerc, on='NERC', how='left')

    # Rename the temperature variable to something more generic:
    if hw_cs == 'HW':
       temp_df.rename(columns={'highest_temperature': 'T_Max_Min'}, inplace=True)
    elif hw_cs == 'CS':
       temp_df.rename(columns={'lowest_temperature': 'T_Max_Min'}, inplace=True) 
    
    # Only keep the columns we need:
    temp_df = temp_df[['NERC','Region','definition','start_date','end_date','centroid_date','T_Max_Min','duration','spatial_coverage']].copy()
      
    # Rename the columns for consistency:
    temp_df.rename(columns={'definition': 'Def', 'start_date': 'Start', 'end_date': 'End', 'centroid_date': 'Center',
                            'duration': 'Duration', 'spatial_coverage': 'Coverage'}, inplace=True)
    
    # Set all dates to datetimes variable and extract the day of year:
    temp_df['Start'] = pd.to_datetime(temp_df['Start'])
    temp_df['End'] = pd.to_datetime(temp_df['End'])
    temp_df['Center'] = pd.to_datetime(temp_df['Center'])
    temp_df['Center_DOY'] = temp_df['Center'].dt.day_of_year

    # Add a new column with a unique row number and an empty column to the dataframe to store the unique ID:
    temp_df['Row_Number'] = np.arange(len(temp_df))
    temp_df['UID_Temp'] = np.nan

    # Make a list of all of the unique NERC regions and definitions in "temp_df":
    nerc_regions = temp_df['NERC'].unique()
    definitions = temp_df['Def'].unique()

    # Loop over the unique NERC region and definition combinations:
    for i in range(len(nerc_regions)):
        for j in range(len(definitions)):
        
            # Subset to just the data for that unique combination:
            subset_df = temp_df[(temp_df['NERC'].isin([nerc_regions[i]]) & temp_df['Def'].isin([definitions[j]]))].copy()

            # If the subset isn't empty then assign a unique ID for each event with that combination:
            if subset_df.empty == False:
               # Add a new column with a unique row number:
               subset_df['Event_Number'] = np.arange(len(subset_df)) + 1

               # Add a new column with a unique identifier:
               if hw_cs == 'HW': 
                  subset_df['UID'] = ('HW_NERC' + subset_df['NERC'].astype(str) + '_Def' + subset_df['Def'].astype(str) + '_Event' + subset_df['Event_Number'].astype(str))
               else:
                  subset_df['UID'] = ('CS_NERC' + subset_df['NERC'].astype(str) + '_Def' + subset_df['Def'].astype(str) + '_Event' + subset_df['Event_Number'].astype(str)) 

               # Loop over the subset and assign the UID back into the main dataframe:
               for row in range(len(subset_df)):
                   temp_df.loc[temp_df['Row_Number'] == subset_df['Row_Number'].iloc[row], 'UID_Temp'] = subset_df['UID'].iloc[row]

            # Clean up and move to the next combination:
            del subset_df, row

    # Rename the UID variable:
    temp_df.rename(columns={'UID_Temp': 'UID'}, inplace=True)
    
    # Make a copy of the necessary variables to output:
    output_df = temp_df[['UID', 'NERC', 'Region', 'Def', 'Start', 'End', 'Center', 'Center_DOY', 'T_Max_Min', 'Duration', 'Coverage']].copy(deep=False)
    
    # Write out the dataframe to a .csv file:
    if hw_cs == 'HW':
       output_df.to_csv((os.path.join(data_output_dir, 'hw_library_expanded.csv')), sep=',', index=False)
    if hw_cs == 'CS':
       output_df.to_csv((os.path.join(data_output_dir, 'cs_library_expanded.csv')), sep=',', index=False)
        
    return output_df


In [9]:
# Test the function:
output_df = expand_hw_cs_libraries(hw_cs = 'CS',
                                   hw_cs_data_dir = hw_cs_data_dir, 
                                   data_output_dir = data_output_dir)

output_df


Unnamed: 0,UID,NERC,Region,Def,Start,End,Center,Center_DOY,T_Max_Min,Duration,Coverage
0,CS_NERC1_Def1_Event1,1,WECC: AZ-NM-SNV,1,1986-12-21,1986-12-22,1986-12-22,356,38.4,2,86.4
1,CS_NERC1_Def1_Event2,1,WECC: AZ-NM-SNV,1,2017-01-16,2017-01-17,2017-01-17,17,37.9,2,86.4
2,CS_NERC1_Def1_Event3,1,WECC: AZ-NM-SNV,1,1993-01-09,1993-01-10,1993-01-09,9,37.8,2,81.8
3,CS_NERC1_Def1_Event4,1,WECC: AZ-NM-SNV,1,2004-11-23,2004-11-24,2004-11-24,329,37.2,2,81.8
4,CS_NERC1_Def1_Event5,1,WECC: AZ-NM-SNV,1,1995-01-27,1995-01-28,1995-01-27,27,37.2,2,86.4
...,...,...,...,...,...,...,...,...,...,...,...
32605,CS_NERC9_Def12_Event217,9,SERC: Delta,12,1985-01-20,1985-01-22,1985-01-21,21,4.2,3,93.8
32606,CS_NERC9_Def12_Event218,9,SERC: Delta,12,1982-01-10,1982-01-12,1982-01-11,11,3.4,3,79.6
32607,CS_NERC9_Def12_Event219,9,SERC: Delta,12,1983-12-23,1983-12-27,1983-12-25,359,1.3,5,96.9
32608,CS_NERC9_Def12_Event220,9,SERC: Delta,12,2021-02-14,2021-02-21,2021-02-16,47,0.3,8,95.1
