# Case: Grouping hours by interconnect
### Details: Group hours first by interconnect load in the same 6 load groups as used before, then split into regions and split by 3 seasons and 4 times of day\
#### Methodology: Use similar code as the one used to organize groups by season, but for interconnects, and then split into and sort by region, season, and time of day. 
### 1: Group by interconnect and 6 load groups

In [19]:
#importing packages needed for analysis
import os
import numpy as np
import pandas as pd
import math
from pandas import DataFrame

path = os.getcwd()
#print(path)

load_dur = pd.read_csv('outputs/load_long_format.csv')

#first, split by interconnect and 6 load groups
inter_load = load_dur.copy()
tod = pd.read_csv('inputs/time_of_day.csv')

inter_load = pd.merge(inter_load,tod,on='Hour',how='left')

#identifying different interconnects
inter_load['Interconnect'] = 'Other'
inter_load.loc[inter_load['R_Group'] == 'WEC', 'Interconnect'] = 'WEC'
inter_load.loc[inter_load['R_Group'] == 'ERC', 'Interconnect'] = 'ERC'
      
#Create a list of interconnects of all regions (551880 rows)
interconnects = inter_load.copy()
interconnects = interconnects[['Interconnect','Hour_Counter']]
#print(interconnects.head())
#print(interconnects.shape)

#get the number of hours in each interconnect
inter_count = interconnects.groupby('Interconnect',as_index=False).count().rename(columns={'Hour_Counter':'Interconnect_Tot'})
inter_count = inter_count.sort_values('Interconnect')
#print(inter_count)

#read in the group shares data
group2 = pd.read_csv('inputs/group_shares.csv')

#combined the group shares data with the interconnect/hours data
#NOTE: if there are ever more than three interconnects or regions, this code should be updated
group2[inter_count.iloc[0,0]] = group2['Share']*inter_count.iloc[0,1]
group2[inter_count.iloc[1,0]] = group2['Share']*inter_count.iloc[1,1]
group2[inter_count.iloc[2,0]] = group2['Share']*inter_count.iloc[2,1]
group_inter2 = pd.melt(group2,id_vars=['Group','Share'],var_name='Interconnect',value_name='Interconnect_Ct')
group_inter2['Interconnect_Counter'] = group_inter2['Interconnect_Ct'].cumsum()
group_inter2['Interconnect_Counter'] = round(group_inter2['Interconnect_Counter'])
#print(group2.dtypes)
#print(group2)
#print(group_inter)
#print()

#sort by interconnect, and then load in ascending order
inter_load_2 = inter_load.sort_values(by=['Interconnect','Load'], ascending=[True, False]).reset_index(drop=True)
inter_load_2['Interconnect_Counter'] = inter_load_2.index + 1.0 
#print(inter_load_sort)

#use interconnect_counter to apply groups to each interconnect 
#create list of group_inter with just the group and interconnect listed
group_inter_index = group_inter2[['Group','Interconnect_Counter']].copy()

#merge to apply groups to each interconnect value based on the counter  
inter_load_2 = pd.merge_asof(inter_load_2, group_inter_index, on='Interconnect_Counter', direction='forward')
inter_load_2 = inter_load_2.drop(columns=['Interconnect_Counter']).reset_index(drop=True)
inter_load_2 = inter_load_2.drop(inter_load_2.columns[0], axis=1)
print(inter_load_2)
#inter_load_2.to_csv('outputs/load_duration_8760_interconnect.csv')

          Region R_Group R_Subgroup    Season  Month  Day  Hour  Hour_Counter  \
0       ERC_REST     ERC       REST    summer      8  223    15          5343   
1       ERC_REST     ERC       REST    summer      8  223    14          5342   
2       ERC_REST     ERC       REST    summer      8  223    16          5344   
3       ERC_REST     ERC       REST    summer      8  222    15          5319   
4       ERC_REST     ERC       REST    summer      8  224    14          5366   
...          ...     ...        ...       ...    ...  ...   ...           ...   
551875  WECC_IID     WEC        IID  shoulder      3   86     4          2044   
551876  WECC_IID     WEC        IID  shoulder      4  101     4          2404   
551877  WECC_IID     WEC        IID  shoulder     11  310     3          7419   
551878  WECC_IID     WEC        IID  shoulder      4  100     4          2380   
551879  WECC_IID     WEC        IID  shoulder      4   99     3          2355   

         Load      TOD Inte

### 2: Sort by region, season, and time of day
### 3: Average load based on groups

In [20]:
#sort by region, season, and time of day 
inter_load_3 = inter_load_2.sort_values(['Interconnect','Group','Region','Season','TOD'])

#average load based on order of groups
aggregations = {'Load':['count',sum,'mean']}
case = inter_load_3.groupby(['Interconnect','Group','Region','Season','TOD'],as_index=False).agg(aggregations)
case.columns = case.columns.droplevel(0)
case.columns = ['Interconnect','Group','Region','Season','TOD','Hour_Tot','Load_Tot','Load_Avg']
#print(case.head())
#print('number of rows in dataset =',case.shape[0])
#case.to_csv('outputs/load_segments_interconnect.csv')
#print()

inter_load_4 = pd.merge(inter_load_3,case,on=['Interconnect','Group','Region','Season','TOD'],how='left')
inter_load_4 = inter_load_4.sort_values(['Region','Load']).reset_index(drop=True)
inter_load_4 = inter_load_4.drop(inter_load_4.columns[0], axis=1)
#print(inter_load_4.head(3))
#print('number of rows in dataset =',inter_load_4.shape[0])
#inter_load_4.to_csv('outputs/load_8760_interconnect.csv')

# Case: NERC Region Grouping
### Details: First group the regions by their market group and separate into load groups there. Then, split into regions and group by season and time of day. 
#### Methodology: Similar, if not identical to the interconnect approach. They will be grouped by market groups instead of interconnects, but everything else remains the same

In [29]:
#match NERC region ID to the region
nerc_id = pd.read_csv('inputs/nerc_regions.csv')

#merge NERC regions and load data together
nerc_load = pd.merge(load_dur,nerc_id, on='Region', how='left')
nerc_load = nerc_load.drop(nerc_load.columns[0], axis=1)

#merge TOD info into DF
tod = pd.read_csv('inputs/time_of_day.csv')
nerc_load = pd.merge(nerc_load,tod,on='Hour',how='left')
nerc_load = nerc_load[['Region','R_Group','R_Subgroup','NERC_Region','Season','Month','Day','TOD','Hour','Hour_Counter',
                     'Load']]
print(nerc_load)

          Region R_Group R_Subgroup NERC_Region  Season  Month  Day    TOD  \
0       ERC_REST     ERC       REST       ERCOT  winter      1    1  night   
1       ERC_REST     ERC       REST       ERCOT  winter      1    1  night   
2       ERC_REST     ERC       REST       ERCOT  winter      1    1  night   
3       ERC_REST     ERC       REST       ERCOT  winter      1    1  night   
4       ERC_REST     ERC       REST       ERCOT  winter      1    1  night   
...          ...     ...        ...         ...     ...    ...  ...    ...   
551875  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
551876  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
551877  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
551878  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   
551879  WEC_SDGE     WEC       SDGE        CAMX  winter     12  365  night   

        Hour  Hour_Counter   Load  
0          1             1 

In [30]:
#Create a list of interconnects of all regions (551880 rows)
nerc_regions = nerc_load.copy()
nerc_regions = nerc_regions[['NERC_Region','Hour_Counter']]
#print(nerc_regions.head())
#print(nerc_regions.shape)

#get the number of hours in each interconnect
nerc_count = nerc_regions.groupby('NERC_Region',as_index=False).count().rename(columns={'Hour_Counter':'NERC_Tot'})
nerc_count = nerc_count.sort_values('NERC_Region')
#print(nerc_count)

#read in the group shares data
group4 = pd.read_csv('inputs/group_shares.csv')

#combined the group shares data with the interconnect/hours data
#NOTE: there are 15 NERC regions
group4[nerc_count.iloc[0,0]] = group4['Share']*nerc_count.iloc[0,1]
group4[nerc_count.iloc[1,0]] = group4['Share']*nerc_count.iloc[1,1]
group4[nerc_count.iloc[2,0]] = group4['Share']*nerc_count.iloc[2,1]
group4[nerc_count.iloc[3,0]] = group4['Share']*nerc_count.iloc[3,1]
group4[nerc_count.iloc[4,0]] = group4['Share']*nerc_count.iloc[4,1]
group4[nerc_count.iloc[5,0]] = group4['Share']*nerc_count.iloc[5,1]
group4[nerc_count.iloc[6,0]] = group4['Share']*nerc_count.iloc[6,1]
group4[nerc_count.iloc[7,0]] = group4['Share']*nerc_count.iloc[7,1]
group4[nerc_count.iloc[8,0]] = group4['Share']*nerc_count.iloc[8,1]
group4[nerc_count.iloc[9,0]] = group4['Share']*nerc_count.iloc[9,1]
group4[nerc_count.iloc[10,0]] = group4['Share']*nerc_count.iloc[10,1]
group4[nerc_count.iloc[11,0]] = group4['Share']*nerc_count.iloc[11,1]
group4[nerc_count.iloc[12,0]] = group4['Share']*nerc_count.iloc[12,1]
group4[nerc_count.iloc[13,0]] = group4['Share']*nerc_count.iloc[13,1]
group4[nerc_count.iloc[14,0]] = group4['Share']*nerc_count.iloc[14,1]
group_nerc = pd.melt(group4,id_vars=['Group','Share'],var_name='NERC_Region',value_name='NERC_Ct')
group_nerc['NERC_Counter'] = group_nerc['NERC_Ct'].cumsum()
group_nerc['NERC_Counter'] = round(group_nerc['NERC_Counter'])
#print(group4.dtypes)
#print(group4)
#print(group_nerc)
#print()

#sort by interconnect, and then load in ascending order
nerc_load_2 = nerc_load.sort_values(by=['NERC_Region','Load'], ascending=[True, False]).reset_index(drop=True)
nerc_load_2['NERC_Counter'] = nerc_load_2.index + 1.0 
#print(nerc_load_2)

#use nerc_counter to apply groups to each nerc region 
#create list of group_inter with just the group and nerc region listed
group_nerc_index = group_nerc[['Group','NERC_Counter']].copy()

#merge to apply groups to each NERC region value based on the counter  
nerc_load_2 = pd.merge_asof(nerc_load_2, group_nerc_index, on='NERC_Counter', direction='forward')
nerc_load_2 = nerc_load_2.drop(columns=['NERC_Counter'])
print(nerc_load_2)
#nerc_load_2.to_csv('outputs/load_duration_8760_NERC_regions.csv')

          Region R_Group R_Subgroup NERC_Region    Season  Month  Day  \
0       WECC_SCE     WEC        SCE        CAMX    summer      9  250   
1       WECC_SCE     WEC        SCE        CAMX    summer      9  250   
2       WECC_SCE     WEC        SCE        CAMX    summer      9  250   
3       WECC_SCE     WEC        SCE        CAMX    summer      9  250   
4       WECC_SCE     WEC        SCE        CAMX    summer      9  250   
...          ...     ...        ...         ...       ...    ...  ...   
551875  WECC_IID     WEC        IID        SRSG  shoulder      3   86   
551876  WECC_IID     WEC        IID        SRSG  shoulder      4  101   
551877  WECC_IID     WEC        IID        SRSG  shoulder     11  310   
551878  WECC_IID     WEC        IID        SRSG  shoulder      4  100   
551879  WECC_IID     WEC        IID        SRSG  shoulder      4   99   

            TOD  Hour  Hour_Counter   Load  Group  
0       middday    16          5992  24240      1  
1       evening    

In [31]:
#sort by region, season, and time of day 
nerc_load_3 = nerc_load_2.sort_values(['NERC_Region','Group','Region','Season','TOD'])

#average load based on order of groups
aggregations2 = {'Load':['count',sum,'mean']}
case2 = nerc_load_3.groupby(['NERC_Region','Group','Region','Season','TOD'],as_index=False).agg(aggregations2)
case2.columns = case2.columns.droplevel(0)
case2.columns = ['NERC_Region','Group','Region','Season','TOD','Hour_Tot','Load_Tot','Load_Avg']
#print(case2.head())
#print('number of rows in dataset =',case2.shape[0])
#case2.to_csv('outputs/load_segments_NERC_region.csv')
#print()

nerc_load_4 = pd.merge(nerc_load_3,case2,on=['NERC_Region','Group','Region','Season','TOD'],how='left')
nerc_load_4 = nerc_load_4.sort_values(['Region','Load']).reset_index(drop=True)
print(nerc_load_4)
print('number of rows in dataset =',nerc_load_4.shape[0])
#nerc_load_4.to_csv('outputs/load_8760_NERC_region.csv')

          Region R_Group R_Subgroup NERC_Region    Season  Month  Day  \
0       ERC_REST     ERC       REST       ERCOT  shoulder      3   86   
1       ERC_REST     ERC       REST       ERCOT  shoulder      4   94   
2       ERC_REST     ERC       REST       ERCOT  shoulder      3   72   
3       ERC_REST     ERC       REST       ERCOT  shoulder      3   65   
4       ERC_REST     ERC       REST       ERCOT  shoulder      3   65   
...          ...     ...        ...         ...       ...    ...  ...   
551875  WEC_SDGE     WEC       SDGE        CAMX    summer      9  249   
551876  WEC_SDGE     WEC       SDGE        CAMX    summer      9  250   
551877  WEC_SDGE     WEC       SDGE        CAMX    summer      9  250   
551878  WEC_SDGE     WEC       SDGE        CAMX    summer      9  250   
551879  WEC_SDGE     WEC       SDGE        CAMX    summer      9  250   

            TOD  Hour  Hour_Counter   Load  Group  Hour_Tot  Load_Tot  \
0         night     3          2043  26989      5 