In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
# don't feel like typing out the US state abbreviations
df_states= pd.read_csv('data/state_table.csv')
df_states = df_states[['name','abbreviation']]
df_states.columns = ['state', 'State']
df_states[:3]

Unnamed: 0,state,State
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ


In [3]:
## get the solar data in a usable form
df_solar = pd.read_csv('data/SolarSummaries.csv')
df_sun = df_solar[['ZIP', 'State','Annual Average Minimum (kWh/m2/day)']]
df_sun.columns = ['ZIP','State','Sun']
df_sun['ZIP'] = df_sun['ZIP'].apply(lambda x: str(x).zfill(5))
df_sun[:3]

Unnamed: 0,ZIP,State,Sun
0,1001,MA,4.48
1,1002,MA,4.38
2,1003,MA,4.38


In [4]:
df_census = pd.read_csv('data/ACS_13_5YR_DP04_with_ann.csv',skiprows=1,low_memory = False)
df_housing = df_census[['Id2','Estimate; UNITS IN STRUCTURE - Total housing units - 1-unit, detached']]
df_housing.columns = ['ZIP','Houses']
df_housing['ZIP'] = df_housing['ZIP'].apply(lambda x: str(x).zfill(5))
df_housing[:3]

Unnamed: 0,ZIP,Houses
0,601,5990
1,602,11996
2,603,15975


In [5]:
df_byzip = df_sun.merge(df_housing)
df_byzip[:3]

Unnamed: 0,ZIP,State,Sun,Houses
0,1001,MA,4.48,3942
1,1002,MA,4.38,4594
2,1003,MA,4.38,0


In [6]:
# capacity is in megawatts
df_capacity = pd.read_csv('data/state_capacity_80pct_RE_ETI.csv')
df_capacity = df_capacity.merge(df_states)[['State','year','rooftop_pv']]
df_capacity['num_systems'] = df_capacity['rooftop_pv']*1e3/5 # systems, using an average system size
df_capacity[:3]
#df_capacity[df_capacity.year == 2050][df_capacity.State == "AL"]

Unnamed: 0,State,year,rooftop_pv,num_systems
0,AL,2010,0.19,38
1,AL,2012,0.19,38
2,AL,2014,0.21,42


In [7]:
# figure out the number of houses per state
print 'number of houses in Alaska: ' + str(df_byzip[df_byzip.State == 'AL']['Houses'].sum())
houses_state = {}
list_states = df_byzip.State.drop_duplicates().tolist()
for state in list_states:
    houses_state[state] = df_byzip[df_byzip.State == state]['Houses'].sum()
houses_state.items()[:5]

number of houses in Alaska: 1487712


[('WA', 1834311),
 ('DE', 238313),
 ('DC', 35892),
 ('WI', 1745464),
 ('WV', 616807)]

In [8]:
# growth factor between now and target year
growth_factor=1.00
target_year=2050

# fraction of houses with solar, on a state basis
df_capacity['num_houses'] = df_capacity['State'].apply(lambda x: houses_state[x]*growth_factor)
df_capacity['solar_fraction'] = df_capacity['num_systems'] / df_capacity['num_houses']
df_capacity[df_capacity.State == 'CA'][df_capacity.year == target_year][:5]

# THIS WORKS, USE IT
# df_capacity[df_capacity.State == 'CA'][df_capacity.year == 2050]['solar_fraction'].iloc[0]

solarfraction_state = {}
for state in list_states:
    try: solarfraction_state[state] = df_capacity[df_capacity.State == state][df_capacity.year == 2050]['solar_fraction'].iloc[0]
    except: solarfraction_state[state] = 0
    
solarfraction_state.items()[:5]



[('WA', 0.25696842029514078),
 ('DE', 0.37239260971915089),
 ('DC', 0),
 ('WI', 0.070207119711434895),
 ('WV', 0.025875192726411988)]

In [10]:
df_byzip['Solar Fraction'] = df_byzip['State'].apply(lambda x: solarfraction_state[x])
df_byzip['Solar Houses'] = (df_byzip['Houses']*df_byzip['Solar Fraction']).round()
df_byzip[:3]

Unnamed: 0,ZIP,State,Sun,Houses,Solar Fraction,Solar Houses
0,1001,MA,4.48,3942,0.132898,524
1,1002,MA,4.38,4594,0.132898,611
2,1003,MA,4.38,0,0.132898,0


In [11]:
df_byzip.to_csv('data/solarhouses_byzip_2050.csv')