In [None]:
import pandas as pd
import numpy as np
import os
import sqlite3
import shutil
import datetime
import geopandas as gpd
import matplotlib.pyplot as plt
import csv

In [None]:
df_ipm = gpd.read_file('data/ipm_regions_simple.geojson')
df_ipm = df_ipm.to_crs('EPSG:4326')
df_ipm.loc[:,'large_region'] = df_ipm['IPM_Region'].apply(lambda x: x.split('_')[0])
df_ipm.head()

In [None]:
#from https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html (division file)
df_census = gpd.read_file('cb_2018_us_division_20m')
df_census = df_census.to_crs("EPSG:4326")
df_census.head()

In [None]:
#from https://hifld-geoplatform.opendata.arcgis.com/datasets/6b2af23c67f04f4cb01d88c61aaf558a_0
df_nerc = gpd.read_file('NERC_Regions-shp/')
df_nerc = df_nerc.to_crs("EPSG:4326")
df_nerc.head()

In [None]:
df_states = gpd.read_file('cb_2018_us_state_500k')
df_states = df_states.to_crs("EPSG:4326")
df_states.head()


In [None]:
fig, ax = plt.subplots(figsize=(10,10))
df_ipm.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_census.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([23, 50])
ax.set_xlim([-130, -65])
ax.set_xticks([])
ax.set_yticks([])
plt.savefig('census_ipm_regions.jpg', dpi=400)

In [None]:
fig, ax = plt.subplots(figsize=(10,10))
df_ipm.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_nerc.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([23, 50])
ax.set_xlim([-130, -65])
ax.set_xticks([])
ax.set_yticks([])
plt.savefig('nerc_ipm_regions.jpg', dpi=400)

In [None]:
df_ipm['coords'] = df_ipm['geometry'].apply(lambda x: x.representative_point().coords[:])
df_ipm['coords'] = [df_ipm[0] for df_ipm in df_ipm['coords']]

In [None]:
fig, ax = plt.subplots(figsize=(20,20))
df_ipm.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([23, 50])
ax.set_xlim([-130, -65])
#ax.set_xticks([])
#ax.set_yticks([])
for idx, row in df_ipm.iterrows():
    plt.annotate(text=row['IPM_Region'], xy=row['coords'],horizontalalignment='center')
plt.savefig('state_ipm_regions.jpg', dpi=400)

In [None]:
df_reg1 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'WEC_' in x
                                                      or 'SCE' in x or 'IID' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg1.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([30, 50])
ax.set_xlim([-127, -110])
plt.title('Region 1')
df_reg1.loc[:,'Region'] =1

In [None]:
df_reg2 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'PNW' in x or '_ID' in x 
                                                      or 'WY' in x or 'MT' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg2.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([30, 50])
ax.set_xlim([-127, -100])
plt.title('Region 2')
df_reg2.loc[:,'Region'] =2

In [None]:
df_reg3 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'AZ' in x or 'UT' in x or 'WECC_CO' in x
                                                     or 'SNV' in x or 'NNV' in x or 'NM' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg3.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([30, 50])
ax.set_xlim([-125, -100])
plt.title('Region 3')
df_reg3.loc[:,'Region'] =3

In [None]:
df_reg4 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'ERC' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg4.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([20, 40])
ax.set_xlim([-110, -80])
plt.title('Region 4')
df_reg4.loc[:,'Region'] =4

In [None]:
df_reg5 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'SPP' in x
                                                     or 'MIS_AR' in x or 'AECI' in x
                                                     or 'MIS_MO' in x or 'MIS_WOTA' in x
                                                     or 'MIS_LA' in x 
                                                     or 'MIS_AM' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg5.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([25, 50])
ax.set_xlim([-110, -80])
plt.title('Region 5')
df_reg5.loc[:,'Region'] =5

In [None]:
df_reg6 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'MIS_MAPP' in x
                                                     or 'COMD' in x or 'MIS_MNWI' in x
                                                     or 'MIS_WUMS' in x or 'MIS_IA' in x
                                                     or 'MIS_MIDA' in x or 'MIS_IL' in x
                                                     or 'MIS_INKY' in x or 'MIS_LMI' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg6.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([20, 50])
ax.set_xlim([-110, -80])
plt.title('Region 6')
df_reg6.loc[:,'Region'] =6

In [None]:
df_reg7 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'S_VACA' in x or 'S_SOU' in x
                                                     or 'FRCC' in x or 'TVA' in x
                                                     or '_MS' in x or 'S_C_KY' in x
                                                     or 'TX' in x 
                                                      ),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg7.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([20, 40])
ax.set_xlim([-100, -75])
plt.title('Region 7')
df_reg7.loc[:,'Region'] =7

In [None]:
df_reg8 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'PJM' in x and 'COMD' not in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg8.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([30,50])
ax.set_xlim([-100, -60])
plt.title('Region 8')
df_reg8.loc[:,'Region'] = 8

In [None]:
df_reg9 = df_ipm.loc[df_ipm.loc[:,'IPM_Region'].apply(lambda x: 'ENG' in x or 'NY' in x),:]
fig, ax = plt.subplots(figsize=(10,10))
df_reg9.plot(ax=ax, column='IPM_Region', edgecolor='grey')
df_states.plot(ax=ax, edgecolor='k', facecolor='None')
ax.set_ylim([40,50])
ax.set_xlim([-80, -60])
plt.title('Region 9')
df_reg9.loc[:,'Region'] =9

In [None]:
df_reg9

In [None]:
df_concat_IPM = pd.concat([df_reg1, df_reg2, df_reg3, df_reg4, df_reg5, df_reg6, df_reg7, df_reg8, df_reg9])
print(len(df_concat_IPM), len(df_concat_IPM.IPM_Region.unique()))
print(len(df_ipm))

In [None]:
df_states.loc[:,'Region'] = 0
#region 1
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'CA' in x ),'Region'] = 1
#region 2
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'OR' in x 
                                              or 'WA' in x or 'ID' in x
                                              or 'MT' in x or 'WY' in x),'Region'] = 2
#region 3
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'NV' in x or 'UT' in x 
                                              or 'AZ' in x or 'CO' in x or 'NM' in x),'Region'] = 3
#region 4
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'TX' in x),'Region'] = 4
#region 5
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'OK' in x or 'KS' in x 
                                              or 'AR' in x or 'MO' in x
                                             or 'SD' in x or 'NE' in x
                                             or 'LA' in x),'Region'] = 5
#region 6:
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'IL' in x or 'IN' in x or 'MI' in x
                                             or 'IA' in x or 'ND' in x
                                             or 'WI' in x or 'MN' in x),'Region'] = 6
#region 7
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'FL' in x or 'GA' in x or 'AL' in x or 'TN' in x
                                              or 'SC' in x or 'NC' in x or 'KY' in x
                                              or 'MS' in x),'Region'] = 7


#region 8:
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'NJ' in x or 'PA' in x or 'OH' in x
                                             or 'WV' in x or 'VA' in x 
                                             or 'DE' in x or 'MD' in x or 'DC' in x),'Region'] = 8
#region 9:
df_states.loc[df_states.loc[:,'STUSPS'].apply(lambda x: 'VT' in x or 'NH' in x or 'RI' in x or 'NY' in x
                                              or 'MA' in x or 'CT' in x or 'ME' in x),'Region'] = 9
df_states = df_states[df_states.Region!=0]

In [None]:
df_states['centroid_x'] = df_states.centroid.x
df_states['centroid_y'] = df_states.centroid.y

In [None]:
df_states_region_groups = df_states.groupby('Region').mean()

In [None]:
dict_regions_write = csv.writer(open("IPM_regions_cluster.csv", "w"))
region_names = dict()
region_names[1] = 'CA'
region_names[2] = 'NW'
region_names[3] = 'SW'
region_names[4] = 'TX'
region_names[5] = 'CEN'
region_names[6] = 'N_CEN'
region_names[7] = 'SE'
region_names[8] = 'MID_AT'
region_names[9] = 'NE'

for key, val in dict_regions.items():    
    dict_regions_write.writerow(['- ', region_names[key], ': ', val])

In [None]:
df_states_region_groups['reg_names'] = df_states_region_groups.index.map(region_names)

In [None]:
df_states_region_groups

In [None]:
fig, ax = plt.subplots(figsize=(17,10))
df_concat_IPM.plot(ax=ax, column='Region', edgecolor='grey',cmap='Set1')
#df_states.plot(ax=ax, edgecolor='grey', facecolor='None')
for index, row in df_states_region_groups.iterrows():
    ax.annotate(row['reg_names'], (row.centroid_x, row.centroid_y), horizontalalignment='right',\
               fontsize=24)
ax.set_ylim([23, 50])
ax.set_xlim([-125, -67])
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
plt.box(on=None)
plt.savefig('IPM_regions_to_temoa_state_groups.jpg', dpi=400)

In [None]:
fig, ax = plt.subplots(figsize=(17,10))
df_states.plot(ax=ax, column='Region', edgecolor='grey',cmap='Set1')
#df_states.plot(ax=ax, edgecolor='grey', facecolor='None')
for index, row in df_states_region_groups.iterrows():
    ax.annotate(row['reg_names'], (row.centroid_x, row.centroid_y), horizontalalignment='right',\
               fontsize=24)
ax.set_ylim([23, 50])
ax.set_xlim([-125, -67])
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
plt.box(on=None)
plt.savefig('states_to_temoa_state_groups.jpg', dpi=400)

In [None]:
df_concat_IPM = df_concat_IPM.loc[:,['IPM_Region', 'Region', 'geometry']]
gdf_concat_IPM = gpd.GeoDataFrame(df_concat_IPM, crs='EPSG:4326')
gdf_concat_IPM.to_file('mapping_IPM_regions_to_temoa_state_groups.geojson', driver="GeoJSON")

In [None]:
gdf_states = gpd.GeoDataFrame(df_states, crs='EPSG:4326')
gdf_states.to_file('mapping_states_to_temoa_state_groups.geojson', driver="GeoJSON")

In [None]:
dict_regions = df_concat_IPM.loc[:,['IPM_Region', 'Region']].groupby('Region')['IPM_Region'].apply(list).to_dict()

In [None]:
key

In [None]:
us_national_list =list(df_concat_IPM.loc[:,'IPM_Region'].values)
us_national_list
dict_regions_write = csv.writer(open("IPM_regions_nationalcluster.csv", "w"))
dict_regions_write.writerow(['US_N', us_national_list])

In [None]:
df_states[['NAME','STUSPS','Region']].to_csv('state_regions_cluster.csv')