# Create Time Series of Multiple Variables for a Given NERC Region


In [1]:
# Start by importing the packages we need:
import os
import datetime

import pandas as pd
import matplotlib.pyplot as plt


## Set the Directory Structure

In [2]:
# Identify the top-level directory and the subdirectory where the data will be stored:
temp_data_dir =  '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/'
renew_data_dir =  '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/'
load_data_dir =  '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/data/nerc_region_load_data/'
image_output_dir =  '/Users/burl878/Documents/Code/code_repos/gdo_climate_toolsuite_visualizations/plots/'


## Write a Function to Process the Temperature Time Series Data


In [3]:
# Define a function to process the time series of temperature for a given NERC region:
def process_temperature_time_series(temp_data_dir: str, temp_region: int):
    
    # Read in the raw time series data for all NERC regions:
    df = pd.read_csv((temp_data_dir + 'nerc_temperature_time_series.csv'))
    
    # Subset to just the data for NERC region you want to use:
    subset_df = df[(df['NERC'] == temp_region)].copy()

    # Set 'date' to a datetime variable and sort by date:
    subset_df['Date'] = pd.to_datetime(subset_df['date'])
    subset_df = subset_df.sort_values(['Date'])

    # Convert from Kelvin to Fahrenheit and round off the values:
    subset_df['T_min'] = (((subset_df['T_min'] - 273.15)*1.8)+32).round(2)
    subset_df['T_mean'] = (((subset_df['T_mean'] - 273.15)*1.8)+32).round(2)
    subset_df['T_max'] = (((subset_df['T_max'] - 273.15)*1.8)+32).round(2)

    # Rename the columns because I'm OCD:
    subset_df.rename(columns={'Date': 'Time_UTC', 'T_min': 'T_Min','T_mean': 'T_Mean','T_max': 'T_Max'}, inplace=True)

    # Add the day of year to be used as an averaging parameter:
    subset_df['DoY'] = subset_df['Time_UTC'].dt.dayofyear

    # Calculate the mean T_Min and T_Max by day of year:
    subset_df['T_Min_Mean'] = subset_df.groupby('DoY')['T_Min'].transform('mean').round(2)
    subset_df['T_Max_Mean'] = subset_df.groupby('DoY')['T_Max'].transform('mean').round(2)
    
    # Only keep the columns we need:
    output_df = subset_df[['Time_UTC','T_Min','T_Min_Mean','T_Max','T_Max_Mean']].copy()
    
    return output_df


In [4]:
# Test the function:
temp_df = process_temperature_time_series(temp_data_dir = temp_data_dir, 
                                          temp_region = 6)

temp_df


Unnamed: 0,Time_UTC,T_Min,T_Min_Mean,T_Max,T_Max_Mean
1,1980-01-01,33.88,24.25,39.52,34.30
17,1980-01-02,30.12,24.36,37.78,34.65
33,1980-01-03,27.33,24.78,36.70,35.26
49,1980-01-04,26.13,25.48,35.56,35.43
65,1980-01-05,27.67,25.54,35.77,35.30
...,...,...,...,...,...
262913,2024-12-27,33.24,25.06,39.78,34.94
262929,2024-12-28,35.41,24.35,41.78,34.41
262945,2024-12-29,35.16,24.41,41.91,34.69
262961,2024-12-30,29.77,24.61,38.59,34.39


## Write a Function to Process the Load Time Series Data


In [5]:
def process_load_time_series(load_data_dir: str, load_region: str):

    # Read in the load data and subset to a given year:
    load_df = pd.read_csv((load_data_dir + 'WECC_Hourly_Loads_1980_to_2025.csv'))
       
    # Set 'Time_UTC	' to a datetime variable add in the year:
    load_df['Time_UTC'] = pd.to_datetime(load_df['Time_UTC'])
    
    # Only keep the columns we need:
    load_df = load_df[['Time_UTC', 'WECC', load_region]].copy()

    # Set 'Time_UTC' to a datetime variable:
    load_df['Time_UTC'] = pd.to_datetime(load_df['Time_UTC'])

    # Add the hour of year to be used as an averaging parameter:
    load_df['HoY'] = (((load_df['Time_UTC'].dt.dayofyear -1) * 24) + load_df['Time_UTC'].dt.hour)
    
    # Rename the columns because I'm OCD:
    load_df.rename(columns={'WECC': 'WECC_Load_MWh', load_region: 'Region_Load_MWh'}, inplace=True)

    # Calculate the mean load by hour of year:
    load_df['WECC_Load_Mean_MWh'] = load_df.groupby('HoY')['WECC_Load_MWh'].transform('mean').round(2)
    load_df['Region_Load_Mean_MWh'] = load_df.groupby('HoY')['Region_Load_MWh'].transform('mean').round(2)

    # Only keep the columns we need:
    output_df = load_df[['Time_UTC','WECC_Load_MWh','WECC_Load_Mean_MWh','Region_Load_MWh','Region_Load_Mean_MWh']].copy()
    
    return output_df
    

In [6]:
# Test the function:
load_df = process_load_time_series(load_data_dir = load_data_dir, 
                                   load_region = 'WECC: NWPP')

load_df


Unnamed: 0,Time_UTC,WECC_Load_MWh,WECC_Load_Mean_MWh,Region_Load_MWh,Region_Load_Mean_MWh
0,1980-01-01 00:00:00,95949.05,96181.19,37148.77,38789.98
1,1980-01-01 01:00:00,100408.36,101770.50,38451.96,40448.07
2,1980-01-01 02:00:00,102709.84,104813.27,39070.34,41414.27
3,1980-01-01 03:00:00,104593.05,106864.34,39582.15,42020.21
4,1980-01-01 04:00:00,104588.55,106979.44,39390.51,41634.89
...,...,...,...,...,...
395275,2024-12-31 19:00:00,97933.90,98306.31,41007.93,41124.19
395276,2024-12-31 20:00:00,97279.37,97801.60,40589.88,40788.11
395277,2024-12-31 21:00:00,97107.29,97550.03,40220.56,40471.19
395278,2024-12-31 22:00:00,,97643.19,,40257.53


## Write a Function to Process the Wind and Solar Time Series Data


In [19]:
def process_renewables_time_series(renew_data_dir: str, renew_region: str):

    # Read in the load data and subset to a given year:
    renew_df = pd.read_csv((renew_data_dir + 'nerc_region_solar_wind_nolosses_historical_1980_2022.csv'))

    # Subset to just the region you want to plot:
    subset_renew_df = renew_df[renew_df['nerc_region'] == renew_region].copy()

    # Fix the time string to conform:
    subset_renew_df['datetime_utc'] = subset_renew_df['datetime_utc'].str.replace('T', ' ', regex=True).replace('Z', '', regex=True)
    
    # Set 'Time_UTC	' to a datetime variable add in the year:
    subset_renew_df['Time_UTC'] = pd.to_datetime(subset_renew_df['datetime_utc'])

    # Add the hour of year to be used as an averaging parameter:
    subset_renew_df['HoY'] = (((subset_renew_df['Time_UTC'].dt.dayofyear -1) * 24) + subset_renew_df['Time_UTC'].dt.hour)

    # Calculate the mean wind and solar by hour of year:
    subset_renew_df['Solar_Mean_MWh'] = subset_renew_df.groupby('HoY')['solar_gen_mwh'].transform('mean').round(2)
    subset_renew_df['Wind_Mean_MWh'] = subset_renew_df.groupby('HoY')['wind_gen_mwh'].transform('mean').round(2)
    
    # Only keep the columns we need:
    output_df = subset_renew_df[['Time_UTC', 'solar_gen_mwh', 'Solar_Mean_MWh', 'solar_cf', 'wind_gen_mwh', 'Wind_Mean_MWh', 'wind_cf']].copy()

    # Rename the columns because I'm OCD:
    output_df.rename(columns={'solar_gen_mwh': 'Solar_MWh', 'solar_cf': 'Solar_CF', 'wind_gen_mwh': 'Wind_MWh', 'wind_cf': 'Wind_CF'}, inplace=True)

    # Round the capacity factor values:
    output_df['Solar_CF'] = output_df['Solar_CF'].round(3)
    output_df['Wind_CF'] = output_df['Wind_CF'].round(3)
    
    return output_df
    

In [20]:
# Test the function:
renew_df = process_renewables_time_series(renew_data_dir = renew_data_dir, 
                                          renew_region = 'NWPP')

renew_df


Unnamed: 0,Time_UTC,Solar_MWh,Solar_Mean_MWh,Solar_CF,Wind_MWh,Wind_Mean_MWh,Wind_CF
87840,1980-01-01 00:00:00,0.00,0.00,0.000,684.82,691.85,0.398
87841,1980-01-01 01:00:00,0.00,0.00,0.000,785.32,718.97,0.457
87842,1980-01-01 02:00:00,0.00,0.00,0.000,739.44,697.89,0.430
87843,1980-01-01 03:00:00,0.00,0.00,0.000,722.52,693.11,0.420
87844,1980-01-01 04:00:00,0.00,0.00,0.000,734.59,707.05,0.427
...,...,...,...,...,...,...,...
6355483,2022-12-31 19:00:00,248.63,351.52,0.529,1051.67,652.93,0.611
6355484,2022-12-31 20:00:00,248.02,343.34,0.528,1014.32,653.36,0.590
6355485,2022-12-31 21:00:00,211.79,324.67,0.451,977.35,673.27,0.568
6355486,2022-12-31 22:00:00,136.02,275.74,0.289,951.36,734.85,0.553


## Make the Plot


In [25]:
def plot_integrated_time_series(start_day: str,
                                end_day: str,
                                temp_data_dir: str, temp_region: int, 
                                load_data_dir: str, load_region: str, 
                                renew_data_dir: str, renew_region: str,
                                image_output_dir: str, image_resolution: int, save_images=False):

    # Read in NERC region name file and extract the name:
    nerc = pd.read_csv((temp_data_dir + 'nerc_region_names.csv'))
    nerc_name = nerc.loc[nerc['number'] == temp_region, 'short_name'].item()
    
    # Process the temperature time series and subset the data to just dates within the time window:
    temp_df = process_temperature_time_series(temp_data_dir = temp_data_dir, 
                                              temp_region = temp_region)
    temp_subset_df = temp_df[(temp_df['Time_UTC'] >= pd.to_datetime(start_day)) & (temp_df['Time_UTC'] <= pd.to_datetime(end_day))].copy()

    # Process the load time series and subset the data to just dates within the time window:
    load_df = process_load_time_series(load_data_dir = load_data_dir, 
                                       load_region = load_region)
    load_subset_df = load_df[(load_df['Time_UTC'] >= pd.to_datetime(start_day)) & (load_df['Time_UTC'] <= pd.to_datetime(end_day))].copy()

    # Process the wind and solar time series and subset the data to just dates within the time window:
    renew_df = process_renewables_time_series(renew_data_dir = renew_data_dir, 
                                              renew_region = renew_region)
    renew_subset_df = renew_df[(renew_df['Time_UTC'] >= pd.to_datetime(start_day)) & (renew_df['Time_UTC'] <= pd.to_datetime(end_day))].copy()

    
    # Make the plot:
    plt.figure(figsize=(25,30))
    plt.rcParams['font.size'] = 18
    plt.rcParams['axes.axisbelow'] = True
    plt.subplot(411)
    plt.plot(temp_subset_df['Time_UTC'], temp_subset_df['T_Max'], color='r', linestyle='-', linewidth=3)
    #plt.plot(temp_subset_df['Time_UTC'], temp_subset_df['T_Max_Mean'], color='k', linestyle='--', linewidth=2)
    plt.xlim([pd.to_datetime(start_day), pd.to_datetime(end_day)])
    plt.ylabel('Max Temp. [$^\circ$F]', fontsize=18)
    plt.title(('Daily Maximum Temperature in ' + nerc_name))

    plt.subplot(412)
    plt.plot(load_subset_df['Time_UTC'], load_subset_df['Region_Load_MWh'], color='m', linestyle='-', linewidth=3)
    plt.plot(load_subset_df['Time_UTC'], load_subset_df['Region_Load_Mean_MWh'], color='k', linestyle='--', linewidth=2)
    plt.xlim([pd.to_datetime(start_day), pd.to_datetime(end_day)])
    plt.ylim([(load_subset_df['Region_Load_MWh'].min() - 1000), (load_subset_df['Region_Load_MWh'].max() + 1000)])
    plt.ylabel('Demand [MWh]', fontsize=18)
    plt.title(('Hourly Electricity Demand'))

    plt.subplot(413)
    plt.plot(renew_subset_df['Time_UTC'], renew_subset_df['Solar_MWh'], color='b', linestyle='-', linewidth=3)
    plt.plot(renew_subset_df['Time_UTC'], renew_subset_df['Solar_Mean_MWh'], color='k', linestyle='--', linewidth=2)
    plt.xlim([pd.to_datetime(start_day), pd.to_datetime(end_day)])
    plt.ylim([0, (renew_subset_df['Solar_MWh'].max() + 25)])
    plt.ylabel('Solar Gen. [MWh]', fontsize=18)
    plt.title(('Hourly Solar Generation'))

    plt.subplot(414)
    plt.plot(renew_subset_df['Time_UTC'], renew_subset_df['Wind_MWh'], color='g', linestyle='-', linewidth=3)
    plt.plot(renew_subset_df['Time_UTC'], renew_subset_df['Wind_Mean_MWh'], color='k', linestyle='--', linewidth=2)
    plt.xlim([pd.to_datetime(start_day), pd.to_datetime(end_day)])
    plt.ylim([0, (renew_subset_df['Wind_MWh'].max() + 25)])
    plt.ylabel('Wind Gen. [MWh]', fontsize=18)
    plt.title(('Hourly Wind Generation'))

    # If the "save_images" flag is set to true then save the plot to a .png file:
    if save_images == True:
       plt.savefig((image_output_dir + str(renew_region) + '_' + start_day + '.png'), dpi=image_resolution, bbox_inches='tight')
       plt.close()


In [27]:
# Make the plot
plot_integrated_time_series(start_day = '2021-06-22',
                            end_day = '2021-07-02',
                            temp_data_dir = temp_data_dir, 
                            temp_region = 6,
                            load_data_dir = load_data_dir,
                            load_region = 'WECC: NWPP',
                            renew_data_dir = renew_data_dir,
                            renew_region = 'NWPP',
                            image_output_dir = image_output_dir, 
                            image_resolution = 150, 
                            save_images = True)
