# Process Wind and Solar Data


In [1]:
# Start by importing the packages we need:
import os
import datetime

import pandas as pd


## Set the Directory Structure

In [2]:
# Identify the data and impage input and output directories:
wind_solar_input_directory = '/Users/burl878/Documents/Code/code_repos/burleyson-etal_2026_tbd/data/wind_solar_data/'
wind_solar_output_directory = '/Users/burl878/Documents/Code/code_repos/burleyson-etal_2026_tbd/data/wind_solar_data/'


## Write a Function to Process the Wind and Solar Time Series Data


In [3]:
def process_renewables_time_series(wind_solar_input_directory: str, wind_solar_output_directory: str):

    # Read in the load data and subset to a given year:
    renew_df = pd.read_csv((wind_solar_input_directory + 'nerc_region_solar_wind_nolosses_historical_1980_2022.csv'))

    # Loop over the TPL-008-1 weather zones in the WECC:
    for region in ['California', 'Great Basin', 'Pacific Northwest', 'Southwest', 'Rocky Mtn']:

        # Set the region abbreviation to be used in the variable names:
        if region == 'California': 
           region_abbreviation = 'CA' 
        if region == 'Great Basin': 
           region_abbreviation = 'GB'
        if region == 'Pacific Northwest': 
           region_abbreviation = 'PNW' 
        if region == 'Rocky Mtn': 
           region_abbreviation = 'RM' 
        if region == 'Southwest': 
           region_abbreviation = 'SW'    

        # Subset to just the region you are currently processing:
        subset_renew_df = renew_df[renew_df['nerc_region'] == region].copy()

        # Fix the time string to conform:
        subset_renew_df['datetime_utc'] = subset_renew_df['datetime_utc'].str.replace('T', ' ', regex=True).replace('Z', '', regex=True)
    
        # Set 'Time_UTC' to a datetime variable:
        subset_renew_df['Time_UTC'] = pd.to_datetime(subset_renew_df['datetime_utc'])

        # Only keep the columns we need:
        subset_renew_df = subset_renew_df[['Time_UTC', 'solar_gen_mwh', 'wind_gen_mwh']].copy()

        # Rename the columns to add the region abbreviation:
        subset_renew_df.rename(columns={'solar_gen_mwh': (region_abbreviation + '_Solar_MWh'), 'wind_gen_mwh': (region_abbreviation + '_Wind_MWh')}, inplace=True)

        # Merge the data into an output dataframe:
        if region == 'California':
           output_df =  subset_renew_df.copy()
        else:
           output_df = output_df.merge(subset_renew_df, on='Time_UTC') 

    # Calculate the total wind and solar across the WECC:
    output_df['WECC_Solar_MWh'] = (output_df['CA_Solar_MWh'] + output_df['GB_Solar_MWh'] + output_df['PNW_Solar_MWh'] + output_df['RM_Solar_MWh'] + output_df['SW_Solar_MWh']).round(2)
    output_df['WECC_Wind_MWh'] = (output_df['CA_Wind_MWh'] + output_df['GB_Wind_MWh'] + output_df['PNW_Wind_MWh'] + output_df['RM_Wind_MWh'] + output_df['SW_Wind_MWh']).round(2)

    # Rearrange the variables because I'm OCD:
    output_df = output_df[['Time_UTC', 'WECC_Solar_MWh', 'WECC_Wind_MWh', 'CA_Solar_MWh', 'CA_Wind_MWh', 'GB_Solar_MWh', 'GB_Wind_MWh', 
                           'PNW_Solar_MWh', 'PNW_Wind_MWh', 'RM_Solar_MWh', 'RM_Wind_MWh', 'SW_Solar_MWh', 'SW_Wind_MWh']].copy()

    # Set the output filename:
    output_filename = ('WECC_Hourly_Wind_Solar_1980_to_2022.csv')
        
    # Write out the dataframe to a .csv file:
    output_df.to_csv((os.path.join(wind_solar_output_directory, output_filename)), sep=',', index=False)
    
    return output_df


In [4]:
# Execute the function:
renew_df = process_renewables_time_series(wind_solar_input_directory = wind_solar_input_directory, 
                                          wind_solar_output_directory = wind_solar_output_directory)

renew_df


Unnamed: 0,Time_UTC,WECC_Solar_MWh,WECC_Wind_MWh,CA_Solar_MWh,CA_Wind_MWh,GB_Solar_MWh,GB_Wind_MWh,PNW_Solar_MWh,PNW_Wind_MWh,RM_Solar_MWh,RM_Wind_MWh,SW_Solar_MWh,SW_Wind_MWh
0,1980-01-01 00:00:00,0.00,1249.23,0.00,107.11,0.00,161.00,0.00,176.12,0.00,513.78,0.00,291.22
1,1980-01-01 01:00:00,0.00,1385.19,0.00,127.62,0.00,195.21,0.00,233.65,0.00,516.63,0.00,312.08
2,1980-01-01 02:00:00,0.00,1296.80,0.00,94.96,0.00,191.48,0.00,193.14,0.00,512.20,0.00,305.02
3,1980-01-01 03:00:00,0.00,1270.69,0.00,79.42,0.00,195.34,0.00,174.92,0.00,516.98,0.00,304.03
4,1980-01-01 04:00:00,0.00,1287.97,0.00,70.93,0.00,208.00,0.00,172.27,0.00,527.02,0.00,309.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...
376939,2022-12-31 19:00:00,3422.21,2723.52,1135.48,1079.79,651.13,321.99,155.48,435.55,246.40,463.08,1233.72,423.11
376940,2022-12-31 20:00:00,3209.55,2825.19,947.81,1228.63,671.01,304.62,162.17,419.16,250.08,444.50,1178.48,428.28
376941,2022-12-31 21:00:00,2786.93,2896.00,765.41,1327.84,494.69,291.76,156.71,398.49,234.44,441.51,1135.68,436.40
376942,2022-12-31 22:00:00,2156.51,2965.08,673.38,1395.38,262.39,282.02,131.09,388.32,168.86,454.76,920.79,444.60
