# Case: Grouping hours by interconnect
### Details: Group hours first by interconnect load in the same 6 load groups as used before, then split into regions and split by 3 seasons and 4 times of day\
#### Methodology: Use similar code as the one used to organize groups by season, but for interconnects, and then split into and sort by region, season, and time of day. 
### 1: Group by interconnect and 6 load groups

In [1]:
#importing packages needed for analysis
import os
import numpy as np
import pandas as pd
import math
from pandas import DataFrame

path = os.getcwd()
#print(path)

load_dur = pd.read_csv('../outputs/load_long_format.csv')
solar_dur = pd.read_csv('../outputs/solar_long_format.csv')
wind_dur = pd.read_csv('../outputs/wind_long_format.csv')

## UNCOMMENT WHICH PROFILE TO BE USED
#x = load_dur
#x_name = 'load'
#x_name2 = 'Load'
#x_column = 'Load'

x = solar_dur
x_name = 'solar'
x_name2 = 'Solar_Gen'
#choose TRG 
x_column = 'TRG6'

#x = wind_dur
#x_name = 'wind'
#x_name2 = 'Wind_Gen'
#x_column = 'TRG4'

#this code creates an output directory in the parent director, if one does not exist yet
#Note: this is where all of the output files will be written, since outputs are large this saves space in git
path = os.getcwd()
parent = os.path.dirname(path)
outputs_dir = parent+'\outputs'
if not os.path.exists(outputs_dir):
    os.makedirs(outputs_dir)
print('output files are written out in parent directory: '+outputs_dir)

outputs_x = outputs_dir+'/'+x_name
if not os.path.exists(outputs_x):
    os.makedirs(outputs_x)
print('output files are written out in parent directory: '+outputs_x)

x = x[['Region','R_Group','R_Subgroup','Season','Month','Day','Hour',x_column]].copy()
years = pd.read_csv('inputs/years.csv').dropna()

#print(x)

#add an hour counter
x['Hour_Counter'] = (x['Hour']) + (x['Day'] - 1) * 24
x = x.sort_values(by=['Region','Hour_Counter'])
unique_hc = pd.Series(x['Hour_Counter'].unique()).dropna()
#print(unique_hc.tail(2))

output files are written out in parent directory: C:\Users\tgoforth\Documents\IPM temporal resolution project\outputs
output files are written out in parent directory: C:\Users\tgoforth\Documents\IPM temporal resolution project\outputs/solar


In [2]:
#first, split by interconnect and 6 load groups
inter_x = x.copy()
tod = pd.read_csv('inputs/time_of_day.csv')

inter_x = pd.merge(inter_x,tod,on='Hour',how='left')

#identifying different interconnects
inter_x['Interconnect'] = 'Other'
inter_x.loc[inter_x['R_Group'] == 'WEC', 'Interconnect'] = 'WEC'
inter_x.loc[inter_x['R_Group'] == 'ERC', 'Interconnect'] = 'ERC'
      
#Create a list of interconnects of all regions (551880 rows)
interconnects = inter_x.copy()
interconnects = interconnects[['Interconnect','Hour_Counter']]
#print(interconnects.head())
#print(interconnects.shape)

#get the number of hours in each interconnect
inter_count = interconnects.groupby('Interconnect',as_index=False).count().rename(columns={'Hour_Counter':'Interconnect_Tot'})
inter_count = inter_count.sort_values('Interconnect')
#print(inter_count)

#read in the group shares data
group2 = pd.read_csv('inputs/group_shares.csv')

#combined the group shares data with the interconnect/hours data
#NOTE: if there are ever more than three interconnects or regions, this code should be updated
group2[inter_count.iloc[0,0]] = group2['Share']*inter_count.iloc[0,1]
group2[inter_count.iloc[1,0]] = group2['Share']*inter_count.iloc[1,1]
group2[inter_count.iloc[2,0]] = group2['Share']*inter_count.iloc[2,1]
group_inter2 = pd.melt(group2,id_vars=['Group','Share'],var_name='Interconnect',value_name='Interconnect_Ct')
group_inter2['Interconnect_Counter'] = group_inter2['Interconnect_Ct'].cumsum()
group_inter2['Interconnect_Counter'] = round(group_inter2['Interconnect_Counter'])
#print(group2.dtypes)
#print(group2)
#print(group_inter)
#print()

#sort by interconnect, and then load in ascending order
inter_x2 = inter_x.sort_values(by=['Interconnect',x_column], ascending=[True, False]).reset_index(drop=True)
inter_x2['Interconnect_Counter'] = inter_x2.index + 1.0 

#use interconnect_counter to apply groups to each interconnect 
#create list of group_inter with just the group and interconnect listed
group_inter_index = group_inter2[['Group','Interconnect_Counter']].copy()

#merge to apply groups to each interconnect value based on the counter  
inter_x2 = pd.merge_asof(inter_x2, group_inter_index, on='Interconnect_Counter', direction='forward')
inter_x2 = inter_x2.drop(columns=['Interconnect_Counter']).reset_index(drop=True)
print(inter_x2)

          Region R_Group R_Subgroup    Season  Month  Day  Hour   TRG6  \
0       ERC_PHDL     ERC       PHDL  shoulder      4   95    11  829.0   
1       ERC_PHDL     ERC       PHDL  shoulder      4   92    11  819.0   
2       ERC_PHDL     ERC       PHDL  shoulder      4   95    12  816.0   
3       ERC_PHDL     ERC       PHDL  shoulder      4   96    11  809.0   
4       ERC_PHDL     ERC       PHDL  shoulder      4   95    16  807.0   
...          ...     ...        ...       ...    ...  ...   ...    ...   
683275   WECC_WY     WEC         WY    winter     12  365    23    NaN   
683276   WECC_WY     WEC         WY    winter     12  365    23    NaN   
683277   WECC_WY     WEC         WY    winter     12  365    24    NaN   
683278   WECC_WY     WEC         WY    winter     12  365    24    NaN   
683279   WECC_WY     WEC         WY    winter     12  365    24    NaN   

        Hour_Counter      TOD Interconnect  Group  
0               2267  middday          ERC      1  
1      

### 2: Sort by region, season, and time of day
### 3: Average load based on groups

In [4]:
#sort by region, season, and time of day 
inter_x3 = inter_x2.sort_values(['Interconnect','Group','Region','Season','TOD'])

#average load based on order of groups
aggregations = {x_column:['count',sum,'mean']}
case = inter_x3.groupby(['Interconnect','Group','Region','Season','TOD'],as_index=False).agg(aggregations)
case.columns = case.columns.droplevel(0)
case.columns = ['Interconnect','Group','Region','Season','TOD','Hour_Tot','Tot','Avg']
#print(case.head())
#print('number of rows in dataset =',case.shape[0])
case.to_csv('../outputs/'+x_name+'/'+x_name+'_segments_interconnect.csv')
#print()

inter_x4 = pd.merge(inter_x3,case,on=['Interconnect','Group','Region','Season','TOD'],how='left')
inter_x4 = inter_x4.sort_values(['Region',x_column]).reset_index(drop=True)
print(inter_x4.head(3))
print('number of rows in dataset =',inter_x4.shape[0])
inter_x4.to_csv('../outputs/'+x_name+'/'+x_name+'_8760_interconnect.csv')

     Region R_Group R_Subgroup    Season  Month  Day  Hour  TRG6  \
0  ERC_PHDL     ERC       PHDL  shoulder      4  101    19   0.0   
1  ERC_PHDL     ERC       PHDL  shoulder      4  107    19   0.0   
2  ERC_PHDL     ERC       PHDL  shoulder     10  279    19   0.0   

   Hour_Counter      TOD Interconnect  Group  Hour_Tot    Tot    Avg  
0          2419  evening          ERC      5        80  482.0  6.025  
1          2563  evening          ERC      5        80  482.0  6.025  
2          6691  evening          ERC      5        80  482.0  6.025  
number of rows in dataset = 683280


# Case: NERC Region Grouping
### Details: First group the regions by their market group and separate into load groups there. Then, split into regions and group by season and time of day. 
#### Methodology: Similar, if not identical to the interconnect approach. They will be grouped by market groups instead of interconnects, but everything else remains the same

In [6]:
#match NERC region ID to the region
nerc_id = pd.read_csv('inputs/nerc_regions.csv')

#merge NERC regions and load data together
nerc_x = pd.merge(x,nerc_id, on='Region', how='left')

#merge TOD info into DF
tod = pd.read_csv('inputs/time_of_day.csv')
nerc_x = pd.merge(nerc_x,tod,on='Hour',how='left')
nerc_x = nerc_x[['Region','R_Group','R_Subgroup','NERC_Region','Season','Month','Day','TOD','Hour','Hour_Counter',
                     x_column]]
print(nerc_x)

          Region R_Group R_Subgroup NERC_Region  Season  Month  Day    TOD  \
0       ERC_PHDL     ERC       PHDL       ERCOT  winter      1    1  night   
1       ERC_PHDL     ERC       PHDL       ERCOT  winter      1    1  night   
2       ERC_PHDL     ERC       PHDL       ERCOT  winter      1    1  night   
3       ERC_PHDL     ERC       PHDL       ERCOT  winter      1    1  night   
4       ERC_PHDL     ERC       PHDL       ERCOT  winter      1    1  night   
...          ...     ...        ...         ...     ...    ...  ...    ...   
683275  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
683276  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
683277  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
683278  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
683279  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   

        Hour  Hour_Counter  TRG6  
0          1             1  

In [7]:
#Create a list of interconnects of all regions (551880 rows)
nerc_regions = nerc_x.copy()
nerc_regions = nerc_regions[['NERC_Region','Hour_Counter']]
#print(nerc_regions.head())
#print(nerc_regions.shape)

#get the number of hours in each interconnect
nerc_count = nerc_regions.groupby('NERC_Region',as_index=False).count().rename(columns={'Hour_Counter':'NERC_Tot'})
nerc_count = nerc_count.sort_values('NERC_Region')
#print(nerc_count)

#read in the group shares data
group4 = pd.read_csv('inputs/group_shares.csv')

#combined the group shares data with the interconnect/hours data
#NOTE: there are 15 NERC regions
group4[nerc_count.iloc[0,0]] = group4['Share']*nerc_count.iloc[0,1]
group4[nerc_count.iloc[1,0]] = group4['Share']*nerc_count.iloc[1,1]
group4[nerc_count.iloc[2,0]] = group4['Share']*nerc_count.iloc[2,1]
group4[nerc_count.iloc[3,0]] = group4['Share']*nerc_count.iloc[3,1]
group4[nerc_count.iloc[4,0]] = group4['Share']*nerc_count.iloc[4,1]
group4[nerc_count.iloc[5,0]] = group4['Share']*nerc_count.iloc[5,1]
group4[nerc_count.iloc[6,0]] = group4['Share']*nerc_count.iloc[6,1]
group4[nerc_count.iloc[7,0]] = group4['Share']*nerc_count.iloc[7,1]
group4[nerc_count.iloc[8,0]] = group4['Share']*nerc_count.iloc[8,1]
group4[nerc_count.iloc[9,0]] = group4['Share']*nerc_count.iloc[9,1]
group4[nerc_count.iloc[10,0]] = group4['Share']*nerc_count.iloc[10,1]
group4[nerc_count.iloc[11,0]] = group4['Share']*nerc_count.iloc[11,1]
group4[nerc_count.iloc[12,0]] = group4['Share']*nerc_count.iloc[12,1]
group4[nerc_count.iloc[13,0]] = group4['Share']*nerc_count.iloc[13,1]
group4[nerc_count.iloc[14,0]] = group4['Share']*nerc_count.iloc[14,1]
group_nerc = pd.melt(group4,id_vars=['Group','Share'],var_name='NERC_Region',value_name='NERC_Ct')
group_nerc['NERC_Counter'] = group_nerc['NERC_Ct'].cumsum()
group_nerc['NERC_Counter'] = round(group_nerc['NERC_Counter'])
#print(group4.dtypes)
#print(group4)
#print(group_nerc)
#print()

#sort by interconnect, and then load in ascending order
nerc_x2 = nerc_x.sort_values(by=['NERC_Region',x_column], ascending=[True, False]).reset_index(drop=True)
nerc_x2['NERC_Counter'] = nerc_x2.index + 1.0 
#print(nerc_x2)

#use nerc_counter to apply groups to each nerc region 
#create list of group_inter with just the group and nerc region listed
group_nerc_index = group_nerc[['Group','NERC_Counter']].copy()

#merge to apply groups to each NERC region value based on the counter  
nerc_x2 = pd.merge_asof(nerc_x2, group_nerc_index, on='NERC_Counter', direction='forward')
nerc_x2 = nerc_x2.drop(columns=['NERC_Counter'])
print(nerc_x2)
nerc_x2.to_csv('../outputs/'+x_name+'_duration_8760_NERC_regions.csv')

          Region R_Group R_Subgroup NERC_Region    Season  Month  Day  \
0       WEC_LADW     WEC       LADW        CAMX  shoulder      3   77   
1       WEC_LADW     WEC       LADW        CAMX  shoulder      4   97   
2       WEC_LADW     WEC       LADW        CAMX  shoulder      4   99   
3       WEC_LADW     WEC       LADW        CAMX  shoulder      4   99   
4       WEC_LADW     WEC       LADW        CAMX  shoulder      4  103   
...          ...     ...        ...         ...       ...    ...  ...   
683275  WECC_IID     WEC        IID        SRSG    winter     12  365   
683276  WECC_IID     WEC        IID        SRSG    winter     12  365   
683277  WECC_IID     WEC        IID        SRSG    winter     12  365   
683278  WECC_IID     WEC        IID        SRSG    winter     12  365   
683279  WECC_IID     WEC        IID        SRSG    winter     12  365   

            TOD  Hour  Hour_Counter   TRG6  Group  
0       middday    16          1840  833.0      1  
1       middday    

In [8]:
#sort by region, season, and time of day 
nerc_x3 = nerc_x2.sort_values(['NERC_Region','Group','Region','Season','TOD'])

#average load based on order of groups
aggregations2 = {x_column:['count',sum,'mean']}
case2 = nerc_x3.groupby(['NERC_Region','Group','Region','Season','TOD'],as_index=False).agg(aggregations2)
case2.columns = case2.columns.droplevel(0)
case2.columns = ['NERC_Region','Group','Region','Season','TOD','Hour_Tot','Tot','Avg']
print(case2.head())
print('number of rows in dataset =',case2.shape[0])
case2.to_csv('../outputs/'+x_name+'/'+x_name+'_segments_NERC_region.csv')
print()

nerc_x4 = pd.merge(nerc_x3,case2,on=['NERC_Region','Group','Region','Season','TOD'],how='left')
nerc_x4 = nerc_x4.sort_values(['Region',x_column]).reset_index(drop=True)
print(nerc_x4)
print('number of rows in dataset =',nerc_x4.shape[0])
nerc_x4.to_csv('../outputs/'+x_name+'/'+x_name+'_8760_NERC_region.csv')

  NERC_Region  Group    Region    Season      TOD  Hour_Tot       Tot  \
0        CAMX      1  WECC_SCE  shoulder  middday         1     771.0   
1        CAMX      1  WECC_SCE    summer  middday         8    6225.0   
2        CAMX      1  WEC_LADW  shoulder  evening         8    6321.0   
3        CAMX      1  WEC_LADW  shoulder  middday       142  114439.0   
4        CAMX      1  WEC_LADW    summer  evening         5    3962.0   

          Avg  
0  771.000000  
1  778.125000  
2  790.125000  
3  805.908451  
4  792.400000  
number of rows in dataset = 1605

          Region R_Group R_Subgroup NERC_Region    Season  Month  Day  \
0       ERC_PHDL     ERC       PHDL       ERCOT  shoulder      4  101   
1       ERC_PHDL     ERC       PHDL       ERCOT  shoulder      4  107   
2       ERC_PHDL     ERC       PHDL       ERCOT  shoulder     10  279   
3       ERC_PHDL     ERC       PHDL       ERCOT  shoulder     10  280   
4       ERC_PHDL     ERC       PHDL       ERCOT  shoulder     10  