To Do:
 - [X] join in NCSL
 - [X] join local opposition
 - [X] model state level ordinances
 - [ ] make resource class
 - [X] is_hybrid
 - [X] has_opposition
 - [ ] replace "Unknown" ISO counties with NULL before geocoding. It fills in garbage otherwise 

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import dbcp
import pandas as pd



In [4]:
from dbcp.data_mart import projects as pj

In [5]:
engine = dbcp.helpers.get_sql_engine()

In [6]:
loc_df = pj._get_iso_location_df(engine)

In [7]:
# no multi-valued projects
assert loc_df['project_id'].nunique() == loc_df.shape[0]

In [8]:
res_df = pj._get_iso_resource_df(engine)

In [9]:
res_df.head()

Unnamed: 0,capacity_mw,project_id,resource_clean
0,500.0,0,Battery Storage
1,500.0,1,Battery Storage
2,725.0,3,Battery Storage
3,4.5,4,Onshore Wind
4,80.0,5,Battery Storage


In [10]:
res_df['resource_clean'].value_counts()

Solar                                 6294
Onshore Wind                          2512
Natural Gas                           1997
Battery Storage                       1964
Biomass                                265
Unknown                                250
Other Storage                          232
Coal                                   215
Hydro                                  185
Nuclear                                 98
Oil                                     97
Offshore Wind                           95
Other                                   58
Solar; Storage                          58
Geothermal                              49
Combustion Turbine                      39
Biofuel                                 21
Landfill Gas                            19
Steam                                   12
Waste Heat                              10
Pumped Storage                          10
Fuel Cell                                8
Wind; Storage                            6
Municipal S

In [11]:
unparsed_hybrids = res_df.loc[res_df['resource_clean'].str.contains(';'),:]
unparsed_hybrids.shape

(69, 3)

In [12]:
is_unparsed_hybrid = res_df.loc[res_df.loc[:,'project_id'].isin(pd.Index(unparsed_hybrids['project_id']))]
is_unparsed_hybrid.head()

Unnamed: 0,capacity_mw,project_id,resource_clean
7738,1.1,7316,Solar; Storage
9472,270.459992,8980,Wind; Storage
9475,241.490005,8983,Wind; Storage
9487,80.0,8995,Oil; Biomass
9493,20.0,9002,Solar; Storage


In [13]:
is_unparsed_hybrid.shape

(69, 3)

In [14]:
from re import IGNORECASE

In [15]:
res_df.loc[res_df['resource_clean'].str.contains('storage', flags=IGNORECASE),:]['project_id'].shape

(2272,)

In [16]:
res_df.groupby('project_id').size().value_counts()

1    12475
2      998
3       10
dtype: int64

In [17]:
is_storage = res_df.loc[:,'resource_clean'].str.contains('storage', flags=IGNORECASE)

In [18]:
res_df['storage_type'] = res_df.loc[:,'resource_clean'].where(is_storage)
res_df['generation_type'] = res_df.loc[:,'resource_clean'].where(~is_storage)

In [19]:
res_df.head(10)

Unnamed: 0,capacity_mw,project_id,resource_clean,storage_type,generation_type
0,500.0,0,Battery Storage,Battery Storage,
1,500.0,1,Battery Storage,Battery Storage,
2,725.0,3,Battery Storage,Battery Storage,
3,4.5,4,Onshore Wind,,Onshore Wind
4,80.0,5,Battery Storage,Battery Storage,
5,800.0,6,Solar,,Solar
6,75.0,7,Solar,,Solar
7,250.0,8,Onshore Wind,,Onshore Wind
8,99.0,9,Solar,,Solar
9,,10,Battery Storage,Battery Storage,


In [20]:
res_df.groupby('project_id').nth(2).shape

(10, 4)

In [21]:
gen = res_df.loc[~is_storage,:]
storage = res_df.loc[is_storage,:]

In [22]:
gen.shape, storage.shape

((12229, 5), (2272, 5))

In [23]:
out = gen.groupby('project_id')[['generation_type', 'capacity_mw']].nth(0).rename(columns={'generation_type': 'generation_type_1', 'capacity_mw': 'generation_capacity_mw_1'})
out.head(10)

Unnamed: 0_level_0,generation_type_1,generation_capacity_mw_1
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,Onshore Wind,4.5
6,Solar,800.0
7,Solar,75.0
8,Onshore Wind,250.0
9,Solar,99.0
10,Solar,400.0
11,Solar,243.0
13,Solar,270.0
14,Solar,83.0
15,Solar,20.0


In [24]:
two_gens = gen.groupby('project_id')[['generation_type', 'capacity_mw']].nth(1).rename(columns={'generation_type': 'generation_type_2', 'capacity_mw': 'generation_capacity_mw_2'})

In [25]:
assert gen.groupby('project_id')[['generation_type', 'capacity_mw']].nth(2).shape[0] == 0

In [26]:
out = out.join(two_gens, how='left')

In [27]:
assert storage.shape[0] == storage.groupby('project_id').ngroups

In [28]:
storage = storage.set_index('project_id', verify_integrity=True)[['storage_type', 'capacity_mw']].rename(columns={'capacity_mw': 'storage_capacity_mw'})
storage.head(10)

Unnamed: 0_level_0,storage_type,storage_capacity_mw
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Battery Storage,500.0
1,Battery Storage,500.0
3,Battery Storage,725.0
5,Battery Storage,80.0
10,Battery Storage,
11,Battery Storage,91.0
12,Battery Storage,30.0
13,Battery Storage,270.0
15,Battery Storage,
16,Battery Storage,320.0


In [29]:
out = out.join(storage, how='outer')

In [30]:
assert out.shape[0] == res_df['project_id'].nunique()

In [31]:
test = pj._convert_resource_df_long_to_wide(res_df)

In [32]:
pd.testing.assert_frame_equal(out.sort_index(), test)

In [33]:
test = pj._get_and_join_iso_tables()

In [34]:
test.head()

Unnamed: 0,project_id,generation_type_1,generation_capacity_mw_1,generation_type_2,generation_capacity_mw_2,storage_type,storage_capacity_mw,date_operational,date_proposed,date_withdrawn,...,project_name,queue_date,queue_status,region,utility,withdrawl_reason,state,state_id_fips,county_id_fips,containing_county
0,0,,,,,Battery Storage,500.0,NaT,2023-12-31,NaT,...,,2020-08-13,active,West (non-ISO),Colstrip,,MT,30,30111,yellowstone
1,1,,,,,Battery Storage,500.0,NaT,2023-12-31,NaT,...,,2020-07-17,active,West (non-ISO),Colstrip,,MT,30,30111,yellowstone
2,3,,,,,Battery Storage,725.0,NaT,2024-11-15,NaT,...,MENIFEE POWER BANK,2019-04-15,active,CAISO,,,CA,6,6065,riverside
3,4,Onshore Wind,4.5,,,,,NaT,2021-12-31,NaT,...,,NaT,active,SPP,OKGE,,OK,40,40047,garfield
4,5,,,,,Battery Storage,80.0,NaT,NaT,NaT,...,,NaT,active,SPP,,,OK,40,40153,woodward


In [35]:
test.columns

Index(['project_id', 'generation_type_1', 'generation_capacity_mw_1',
       'generation_type_2', 'generation_capacity_mw_2', 'storage_type',
       'storage_capacity_mw', 'date_operational', 'date_proposed',
       'date_withdrawn', 'days_in_queue', 'developer', 'entity',
       'interconnection_status_lbnl', 'point_of_interconnection',
       'project_name', 'queue_date', 'queue_status', 'region', 'utility',
       'withdrawl_reason', 'state', 'state_id_fips', 'county_id_fips',
       'containing_county'],
      dtype='object')

In [36]:
test['generation_type_2'].notna().agg(['sum', 'mean'])

sum     41.00000
mean     0.00304
Name: generation_type_2, dtype: float64

In [37]:
ncsl = pj._get_ncsl_wind_permitting_df(engine)
ncsl.head()

Unnamed: 0,description,permitting_type,state_id_fips
0,According to the Wind Energy Technology Office...,Local,1
1,A Certificate of Convenience and Necessity iss...,Hybrid,2
2,Utilities planning to construct an energy faci...,Hybrid,4
3,New construction of larger facilities providin...,Local,5
4,"Land use decisions, including wind siting, are...",Local,6


In [38]:
test.head().merge(ncsl, on='state_id_fips', how='left')

Unnamed: 0,project_id,generation_type_1,generation_capacity_mw_1,generation_type_2,generation_capacity_mw_2,storage_type,storage_capacity_mw,date_operational,date_proposed,date_withdrawn,...,queue_status,region,utility,withdrawl_reason,state,state_id_fips,county_id_fips,containing_county,description,permitting_type
0,0,,,,,Battery Storage,500.0,NaT,2023-12-31,NaT,...,active,West (non-ISO),Colstrip,,MT,30,30111,yellowstone,There is no state level siting authority for w...,Local
1,1,,,,,Battery Storage,500.0,NaT,2023-12-31,NaT,...,active,West (non-ISO),Colstrip,,MT,30,30111,yellowstone,There is no state level siting authority for w...,Local
2,3,,,,,Battery Storage,725.0,NaT,2024-11-15,NaT,...,active,CAISO,,,CA,6,6065,riverside,"Land use decisions, including wind siting, are...",Local
3,4,Onshore Wind,4.5,,,,,NaT,2021-12-31,NaT,...,active,SPP,OKGE,,OK,40,40047,garfield,"Prior to constructing a wind facility, a proje...",Hybrid
4,5,,,,,Battery Storage,80.0,NaT,NaT,NaT,...,active,SPP,,,OK,40,40153,woodward,"Prior to constructing a wind facility, a proje...",Hybrid


In [39]:
local_opp = pj._get_local_opposition_df(engine)
local_opp.head()

Unnamed: 0,county_id_fips,earliest_year_mentioned,locality_name,locality_type,ordinance
0,1049,2019.0,Dekalb County,county,"Under a 2019 ordinance, any wind energy system..."
1,1003,,Baldwin County,county,"Large wind energy conversion systems (WECS), U..."
2,6071,2019.0,San Bernardino County,county,"In 2019, the San Bernardino County Board of Su..."
3,6073,,San Diego County,county,San Diego County limits small wind turbine hei...
4,8121,2020.0,Washington County,county,A temporary moratorium on the county’s process...


In [40]:
state_df = pj._get_state_opposition_df(engine)
state_df

Unnamed: 0,earliest_year_mentioned,policy,state_id_fips
0,2017,"In 2017, the Legislature enacted Public Act No...",9
1,2004,"In 2004, Kansas Governor Kathleen Sebelius ins...",20
2,2018,Governor LePage signed an executive order in J...,23
3,2020,In 2020 New York enacted the Accelerated Renew...,36
4,2019,"Legislative amendments enacted on May 23, 2019...",41


In [41]:
# drop states that repealed their policies or whose policy was pro-RE not anti-RE
fips_codes_to_drop = {'23', '36'} # Maine, New York
filtered_state_df = state_df.loc[~state_df.loc[:, 'state_id_fips'].isin(fips_codes_to_drop),:]

In [42]:
filtered_state_df

Unnamed: 0,earliest_year_mentioned,policy,state_id_fips
0,2017,"In 2017, the Legislature enacted Public Act No...",9
1,2004,"In 2004, Kansas Governor Kathleen Sebelius ins...",20
4,2019,"Legislative amendments enacted on May 23, 2019...",41


In [43]:
all_counties = pj._get_county_fips_df(engine)
all_states = pj._get_state_fips_df(engine)

In [44]:
all_counties

Unnamed: 0,state_id_fips,county_name,county_id_fips
0,01,Autauga County,01001
1,01,Baldwin County,01003
2,01,Barbour County,01005
3,01,Bibb County,01007
4,01,Blount County,01009
...,...,...,...
3231,72,Yauco Municipio,72153
3232,74,Midway Islands District,74300
3233,78,St. Croix Island District,78010
3234,78,St. John Island District,78020


In [45]:
states_as_counties = filtered_state_df.merge(all_counties, on='state_id_fips', how='left')
states_as_counties

Unnamed: 0,earliest_year_mentioned,policy,state_id_fips,county_name,county_id_fips
0,2017,"In 2017, the Legislature enacted Public Act No...",09,Fairfield County,09001
1,2017,"In 2017, the Legislature enacted Public Act No...",09,Hartford County,09003
2,2017,"In 2017, the Legislature enacted Public Act No...",09,Litchfield County,09005
3,2017,"In 2017, the Legislature enacted Public Act No...",09,Middlesex County,09007
4,2017,"In 2017, the Legislature enacted Public Act No...",09,New Haven County,09009
...,...,...,...,...,...
144,2019,"Legislative amendments enacted on May 23, 2019...",41,Wallowa County,41063
145,2019,"Legislative amendments enacted on May 23, 2019...",41,Wasco County,41065
146,2019,"Legislative amendments enacted on May 23, 2019...",41,Washington County,41067
147,2019,"Legislative amendments enacted on May 23, 2019...",41,Wheeler County,41069


In [46]:
combined = states_as_counties.merge(local_opp, on='county_id_fips', how='outer')
combined

Unnamed: 0,earliest_year_mentioned_x,policy,state_id_fips,county_name,county_id_fips,earliest_year_mentioned_y,locality_name,locality_type,ordinance
0,2017.0,"In 2017, the Legislature enacted Public Act No...",09,Fairfield County,09001,,,,
1,2017.0,"In 2017, the Legislature enacted Public Act No...",09,Hartford County,09003,,,,
2,2017.0,"In 2017, the Legislature enacted Public Act No...",09,Litchfield County,09005,,,,
3,2017.0,"In 2017, the Legislature enacted Public Act No...",09,Middlesex County,09007,,,,
4,2017.0,"In 2017, the Legislature enacted Public Act No...",09,New Haven County,09009,,,,
...,...,...,...,...,...,...,...,...,...
243,,,,,49005,2017.0,Hyrum,city,A moratorium on new solar power installation w...
244,,,,,51023,2020.0,Botetourt County,county,A turbine height limit of 550 feet provided by...
245,,,,,54037,,Jefferson County,county,The Jefferson County Commission is currently c...
246,,,,,55071,2013.0,Manitowoc County,county,In 2013 the Manitowoc County Board passed a re...


In [47]:
combined['ordinance_earliest_year_mentioned'] = combined['earliest_year_mentioned_y'].fillna(combined['earliest_year_mentioned_x'])

In [48]:
combined['ordinance'].fillna('State Policy: ' + combined['policy'])

0      State Policy: In 2017, the Legislature enacted...
1      State Policy: In 2017, the Legislature enacted...
2      State Policy: In 2017, the Legislature enacted...
3      State Policy: In 2017, the Legislature enacted...
4      State Policy: In 2017, the Legislature enacted...
                             ...                        
243    A moratorium on new solar power installation w...
244    A turbine height limit of 550 feet provided by...
245    The Jefferson County Commission is currently c...
246    In 2013 the Manitowoc County Board passed a re...
247    An ordinance requiring turbine setbacks of a h...
Name: ordinance, Length: 248, dtype: object

In [49]:
has_both = combined[['ordinance', 'policy']].notna().all(axis=1)

In [50]:
# concatenate the intersection
combined.loc[has_both, 'ordinance'] = combined.loc[has_both, 'ordinance'] + (' State Policy: ' + combined.loc[has_both, 'policy'])

In [51]:
combined.head(3)

Unnamed: 0,earliest_year_mentioned_x,policy,state_id_fips,county_name,county_id_fips,earliest_year_mentioned_y,locality_name,locality_type,ordinance,ordinance_earliest_year_mentioned
0,2017.0,"In 2017, the Legislature enacted Public Act No...",9,Fairfield County,9001,,,,,2017.0
1,2017.0,"In 2017, the Legislature enacted Public Act No...",9,Hartford County,9003,,,,,2017.0
2,2017.0,"In 2017, the Legislature enacted Public Act No...",9,Litchfield County,9005,,,,,2017.0


In [52]:
pj._combine_state_and_local_opposition_as_counties(state_df, local_opp, all_counties, all_states)

Unnamed: 0,county_id_fips,locality_name,locality_type,ordinance,ordinance_earliest_year_mentioned
0,09001,CT,state,"State Policy: In 2017, the Legislature enacted...",2017.0
1,09003,CT,state,"State Policy: In 2017, the Legislature enacted...",2017.0
2,09005,CT,state,"State Policy: In 2017, the Legislature enacted...",2017.0
3,09007,CT,state,"State Policy: In 2017, the Legislature enacted...",2017.0
4,09009,CT,state,"State Policy: In 2017, the Legislature enacted...",2017.0
...,...,...,...,...,...
243,49005,Hyrum,city,A moratorium on new solar power installation w...,2017.0
244,51023,Botetourt County,county,A turbine height limit of 550 feet provided by...,2020.0
245,54037,Jefferson County,county,The Jefferson County Commission is currently c...,
246,55071,Manitowoc County,county,In 2013 the Manitowoc County Board passed a re...,2013.0


In [53]:
local_opp['locality_type'].isna().sum()

0

In [54]:
merged_state_local_opposition = pj._combine_state_and_local_opposition_as_counties(state_df, local_opp, all_counties, all_states)

In [55]:
merged = test.merge(merged_state_local_opposition, on='county_id_fips', how='left')
merged.head()

Unnamed: 0,project_id,generation_type_1,generation_capacity_mw_1,generation_type_2,generation_capacity_mw_2,storage_type,storage_capacity_mw,date_operational,date_proposed,date_withdrawn,...,utility,withdrawl_reason,state,state_id_fips,county_id_fips,containing_county,locality_name,locality_type,ordinance,ordinance_earliest_year_mentioned
0,0,,,,,Battery Storage,500.0,NaT,2023-12-31,NaT,...,Colstrip,,MT,30,30111,yellowstone,,,,
1,1,,,,,Battery Storage,500.0,NaT,2023-12-31,NaT,...,Colstrip,,MT,30,30111,yellowstone,,,,
2,3,,,,,Battery Storage,725.0,NaT,2024-11-15,NaT,...,,,CA,6,6065,riverside,,,,
3,4,Onshore Wind,4.5,,,,,NaT,2021-12-31,NaT,...,OKGE,,OK,40,40047,garfield,,,,
4,5,,,,,Battery Storage,80.0,NaT,NaT,NaT,...,,,OK,40,40153,woodward,,,,


In [56]:
list(pj._add_derived_columns(merged).columns)

['project_id',
 'generation_type_1',
 'generation_capacity_mw_1',
 'generation_type_2',
 'generation_capacity_mw_2',
 'storage_type',
 'storage_capacity_mw',
 'date_operational',
 'date_proposed',
 'date_withdrawn',
 'days_in_queue',
 'developer',
 'entity',
 'interconnection_status_lbnl',
 'point_of_interconnection',
 'project_name',
 'queue_date',
 'queue_status',
 'region',
 'utility',
 'withdrawl_reason',
 'state',
 'state_id_fips',
 'county_id_fips',
 'containing_county',
 'locality_name',
 'locality_type',
 'ordinance',
 'ordinance_earliest_year_mentioned',
 'has_ordinance',
 'is_hybrid',
 'resource_class']

In [61]:
out = pj.make_project_data_mart_table()

In [62]:
out.head()

Unnamed: 0,project_name,project_id,iso_region,entity,utility,developer,state,county,state_id_fips,county_id_fips,...,point_of_interconnection,queue_status,withdrawl_reason,has_ordinance,ordinance_jurisdiction_name,ordinance_jurisdiction_type,ordinance_earliest_year_mentioned,ordinance,state_permitting_type,state_permitting_text
0,,0,West (non-ISO),NWE,Colstrip,,MT,Yellowstone County,30,30111,...,500kV at Broadview Substation,active,,False,,,,,Local,There is no state level siting authority for w...
1,,1,West (non-ISO),PacifiCorp,Colstrip,,MT,Yellowstone County,30,30111,...,Broadview substation,active,,False,,,,,Local,There is no state level siting authority for w...
2,MENIFEE POWER BANK,3,CAISO,CAISO,,,CA,Riverside County,6,6065,...,Valley Substation 500kV,active,,False,,,,,Local,"Land use decisions, including wind siting, are..."
3,,4,SPP,SPP,OKGE,,OK,Garfield County,40,40047,...,Breckinridge 138kV,active,,False,,,,,Hybrid,"Prior to constructing a wind facility, a proje..."
4,,5,SPP,SPP,,,OK,Woodward County,40,40153,...,Fort Supply SW 138kV Substation,active,,False,,,,,Hybrid,"Prior to constructing a wind facility, a proje..."


In [63]:
!pwd

/app/notebooks


In [64]:
out.to_csv('../data/output/project_data_mart.csv', index=False)