# Evaluate the Impact of Population-Weighting on Heat Wave Events


In [78]:
# Start by importing the packages we need:
import os
import glob
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from glob import glob



## Set the Directory Structure


In [9]:
# Identify the data input and image output directories:
service_territory_data_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/ba_service_territory_data/'
population_data_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/population_data/'
weather_data_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/wrf_tell_counties_output/historic/'
load_data_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/outputs/mlp_output/historic/'
data_output_dir =  '/Users/burl878/Documents/Code/code_repos/nerc_analysis/data/'
image_output_dir =  '/Users/burl878/Documents/Code/code_repos/nerc_analysis/plots/'


## Process the Weather and Load Time Series by BA


In [96]:
# Define a function to process the load time series for a given BA and date range:
def process_ba_load_time_series(ba_to_process: str, start_year: int, end_year: int, load_data_dir: str):
    
    # Loop over the years of load data:
    for year in range(start_year, end_year, 1):
        
        # Read in the .csv file and replace missing values with nan:
        mlp_data = pd.read_csv((load_data_dir + '/' + str(year) + '/' + ba_to_process + '_' + str(year) + '_mlp_output.csv')).replace(-9999, np.nan)

        # Set the time variable as a datetime variable:
        mlp_data['Time_UTC'] = pd.to_datetime(mlp_data['Time_UTC'])
        
        # Rename the "BA" variable:
        mlp_data.rename(columns={'BA': 'BA_Code'}, inplace=True)

        # Rename the "Load" variable:
        mlp_data.rename(columns={'Load': 'Load_MWh'}, inplace=True)

        # Replacing missing or negative loads with NaN:
        mlp_data.loc[~(mlp_data['Load_MWh'] > 0), 'Load_MWh'] = np.nan

        # Subset to just the variables we need:
        mlp_data = mlp_data[['Time_UTC', 'Load_MWh']]
    
        # Aggregate the output into a new dataframe:
        if year == start_year:
           mlp_output_df = mlp_data
        else:
           mlp_output_df = pd.concat([mlp_output_df, mlp_data])
        
    return mlp_output_df


In [99]:
# Define a function to process the time series for a given BA and date range:
def process_ba_time_series(ba_to_process: str, start_year: int, end_year: int, weather_data_dir: str, service_territory_data_dir: str, 
                           population_data_dir: str, load_data_dir: str, data_output_dir: str):
    
    # Read in the county-level population data:
    pop_df = pd.read_csv(population_data_dir + 'county_populations_2000_to_2020.csv')

    # Subset to just the variables we need:
    pop_df = pop_df[['county_FIPS', 'pop_2019']]

    # Rename the variables for simplicity:
    pop_df.rename(columns={'county_FIPS': 'FIPS', 'pop_2019': 'Population'}, inplace=True)
    
    # Read in the BA-to-county mapping file:
    mapping_df = pd.read_csv(service_territory_data_dir + 'ba_service_territory_2019.csv')
    
    # Subset to just the BA you want to process:
    mapping_df = mapping_df.loc[(mapping_df['BA_Code'] == ba_to_process)]
    
    # Rename the variables for simplicity:
    mapping_df.rename(columns={'County_FIPS': 'FIPS'}, inplace=True)
    
    # Subset to just the variables we need:
    mapping_df = mapping_df[['BA_Code', 'FIPS']]
    
    # Initiate a counter to store the results:
    counter = 0;
    output_df = pd.DataFrame()
    
    # Loop over the years of weather data:
    for year in range(start_year, end_year, 1):
        
        # Create a list of all county meteorology files in the input directory:
        list_of_files = glob(os.path.join(weather_data_dir, str(year), '*.csv'))
    
        # Loop over that list process each file:
        for file in range(len(list_of_files)):
        # for file in range(1):
            # Iterate the counter by one:
            counter = counter + 1
        
            # Extract the filename from the list:
            filename = list_of_files[file].rsplit('/', 1)[1]
       
            # Extract the time string from the name of the file:
            filetime = filename.replace("_UTC_County_Mean_Meteorology.csv", "")
            
            # Read in the .csv file:
            met_df = pd.read_csv(list_of_files[file])
            
            # Convert the temperature from Kelvin to Fahrenheit:
            met_df['T2'] = (1.8 * (met_df['T2'] - 273)) + 32
        
            # Merge the meteorology and population data into the mapping_df
            ba_df = mapping_df.merge(met_df, on=['FIPS']).merge(pop_df, on=['FIPS'])
        
            # Compute the fraction of the total population in the BA that lives in a given county:
            ba_df['Population_Fraction'] = ba_df['Population'] / (ba_df['Population'].sum())

            # Population-weight T2:
            ba_df['T2_Weighted'] = (ba_df['T2'].mul(ba_df['Population_Fraction']))
       
            # Add the time step to the output file:
            output_df.loc[counter, 'Time_UTC'] = pd.to_datetime(filetime, exact=False, format='%Y_%m_%d_%H')
            output_df.loc[counter, 'T2_UW'] = (ba_df['T2'].mean()).round(2)
            output_df.loc[counter, 'T2_PW'] = (ba_df['T2_Weighted'].sum()).round(2)
            output_df.loc[counter, 'T2_Min'] = ba_df['T2'].min().round(2)
            output_df.loc[counter, 'T2_Max'] = ba_df['T2'].max().round(2)
            
            # Clean up the old dataframes and move to the next file in the loop:
            del filename, filetime, met_df, ba_df
        
    # Sort by time:
    output_df = output_df.sort_values(['Time_UTC'])
    
    # Aggregate the TELL MLP output for the BA and date range:
    load_df = process_ba_load_time_series(ba_to_process = ba_to_process, 
                                          start_year = start_year, 
                                          end_year = end_year, 
                                          load_data_dir = load_data_dir)
    
    # Merge the meteorology and load data:
    output_df = output_df.merge(load_df, on=['Time_UTC'])
        
    # Create the ouput filename:    
    csv_output_filename = os.path.join(data_output_dir, (ba_to_process + '_Time_Series_' + str(start_year) + '_to_' + str(end_year) + '.csv'))
        
    # Write out the dataframe to a .csv file:
    output_df.to_csv(csv_output_filename, sep=',', index=False)
    
    return output_df


In [None]:
output_df = process_ba_time_series(ba_to_process = 'ERCO', 
                                   start_year = 1980, 
                                   end_year = 1990, 
                                   weather_data_dir = weather_data_dir, 
                                   service_territory_data_dir = service_territory_data_dir, 
                                   population_data_dir = population_data_dir, 
                                   load_data_dir = load_data_dir, 
                                   data_output_dir = data_output_dir)

output_df


## Make the Time Series and Error Distribution Plots
