# Electric Rates by Zipcode - Investor-Owned Utilities

## U.S. Dept. of Energy

In [56]:
import pandas as pd 
import numpy as np 
import pickle 
from pprint import pprint

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
df = pd.read_csv('iouzipcodes2015 (1).csv', dtype={'zip':'object'})
df.head()

Unnamed: 0,zip,eiaid,utility_name,state,service_type,ownership,comm_rate,ind_rate,res_rate
0,85321,176,Ajo Improvement Co,AZ,Bundled,Investor Owned,0.096925,0.088462,0.101581
1,35218,195,Alabama Power Co,AL,Bundled,Investor Owned,0.111275,0.061426,0.122035
2,35219,195,Alabama Power Co,AL,Bundled,Investor Owned,0.111275,0.061426,0.122035
3,35214,195,Alabama Power Co,AL,Bundled,Investor Owned,0.111275,0.061426,0.122035
4,35215,195,Alabama Power Co,AL,Bundled,Investor Owned,0.111275,0.061426,0.122035


In [8]:
df.rename(columns={'zip':'zipcode'}, inplace=True)

# Average Electric Rates per Zipcode

In [17]:
electric_rates = df.groupby('zipcode')[['comm_rate', 'ind_rate', 'res_rate']].mean().reset_index()

In [20]:
electric_rates.to_pickle("./electric_rates.pkl")

In [21]:
electric_rates = pd.read_pickle("./electric_rates.pkl")

# Non Investor Owned Utilities

In [24]:
df_noniou = pd.read_csv('noniouzipcodes2015.csv', dtype={'zip':'object'})
df_noniou.head()

Unnamed: 0,zip,eiaid,utility_name,state,service_type,ownership,comm_rate,ind_rate,res_rate
0,38858,55,City of Aberdeen - (MS),MS,Bundled,Municipal,0.108123,0.051382,0.110196
1,39730,55,City of Aberdeen - (MS),MS,Bundled,Municipal,0.108123,0.051382,0.110196
2,70510,59,City of Abbeville - (LA),LA,Bundled,Municipal,0.110181,0.086184,0.104306
3,21824,84,A & N Electric Coop,MD,Bundled,Cooperative,0.12974,0.0,0.123743
4,21851,84,A & N Electric Coop,MD,Bundled,Cooperative,0.12974,0.0,0.123743


In [28]:
df_noniou.rename(columns={'zip':'zipcode'}, inplace=True)
noniou_rates = df_noniou.groupby('zipcode')[['comm_rate', 'ind_rate', 'res_rate']].mean().reset_index()

In [30]:
pd.to_pickle(noniou_rates, './noniou_rates.pkl')

In [31]:
noniou_rates.shape

(21881, 4)

# State Level Residential Electricity Consumption 

Data From U.S. Energy Information Administration - 2015 Rates

In [39]:
usage = pd.DataFrame({'state':['CA', 'NY', 'MA', 'AZ', 'CT', 'MD', 
                                  'NH', 'TX', 'NV', 'WI', 'NM', 'OR', 
                                  'DE', 'MN', 'FL', 'UT', 'IL'], 
                         'avg_monthly_consumption_res':[557, 601, 602, 1028, 731, 1012, 
                                                   621, 1176, 913, 668, 635, 902, 977,
                                                   762, 1141, 744, 719],
                     'avg_monthly_consumption_comm':[5852, 6082, 5364, 7753, 7082, 10019,
                                                    3536, 7968, 4992, 5640, 5208, 5827, 
                                                    6661, 6743, 6750, 7979, 6952]})

In [41]:
pd.to_pickle(usage, './electricity_usage.pkl')

# Get the Zip Codes We Need

In [205]:
df_finances = pd.read_pickle('./finances.pkl')

In [208]:
df_finances.head()

Unnamed: 0,Actual,Predicted,Residual,zipcode,latitude,longitude,Expected_annual_pv_prod_median,Size_System_median_kw,state,res_rate,avg_monthly_consumption_res,avg_annual_consumption_res,Residential_Payback_Time,Annual_Energy_Value_res,yearly_electric_bill_no_solar,Savings_25_years,median_solar_insolation,County
0,32187.0,34823.74866,-2636.74866,1001,42.062368,-72.625754,8583.645,7.42,MA,0.145852,602,7224,33.051033,1251.942811,1053.635707,31298.570284,4.24,Hampden
1,30155.24,31275.459039,-1120.219039,1002,42.364061,-72.458739,7558.89,6.24,MA,0.142541,602,7224,30.372911,1077.451099,1029.715572,26936.277474,4.26,Hampshire
2,28370.09,34853.454835,-6483.364835,1005,42.418848,-72.106598,8487.92,7.2,MA,0.13923,602,7224,34.652628,1181.770654,1005.795437,29544.266342,4.39,Worcester
3,35844.38,34799.153138,1045.226862,1007,42.27901,-72.400468,8932.55,7.29,MA,0.142541,602,7224,33.794918,1273.253853,1029.715572,31831.346316,4.27,Hampshire
4,41132.255,43497.015054,-2364.760054,1008,42.190144,-72.95435,11288.13,9.11,MA,0.145852,602,7224,41.282784,1646.397679,1053.635707,41159.94198,4.15,Hampden


In [209]:
zips_to_use = df_finances.zipcode.tolist()

# Scraping Project Sunroof with Selenium

In [243]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
import os
import re
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException

In [247]:
CHROME_PATH = "/Applications/Google Chrome.app"
chrome_options = Options()
chrome_options.add_argument("--headless")

In [105]:
chromedriver = "/Applications/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver

In [230]:
driver = webdriver.Chrome(chromedriver)
driver.get("https://www.google.com/get/sunroof/data-explorer/")

In [292]:
total = {}
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver, chrome_options=chrome_options)
driver.get("https://www.google.com/get/sunroof/data-explorer/")
for zipcode in third_round_zipcodes:
    try:
        input_form = driver.find_element_by_id("input-2")
        input_form.send_keys(zipcode) #Enter Zip Code
        time.sleep(2)   
        input_form.send_keys(Keys.RETURN) #Submit Form
        time.sleep(2)
    except NoSuchElementException:
        time.sleep(2)
        print(f"No Information available for {zipcode} zip code.")
        driver.get("https://www.google.com/get/sunroof/data-explorer/")
        input_form = driver.find_element_by_id("input-2")
        input_form.send_keys(zipcode) #Enter Zip Code
        time.sleep(2)   
        input_form.send_keys(Keys.RETURN) #Submit Form
        time.sleep(2)
    
    content = driver.find_elements_by_class_name("place-metrics-cell-value")
    print(f"Solar Information: {len(content)}")
    solar_numbers = [att.text for att in content]
    time.sleep(1)
    incentives = driver.find_elements_by_class_name("incentive-name")
    print(f"Number of Tax Incentives: {len(incentives)}")
    incentive_info = [att.text for att in incentives]
    
    print(f"Finished with zipcode {zipcode} page.")
    
    zipcode_information = {"Solar_Numbers": solar_numbers, 
                           "Tax_Credit_Info": incentive_info}
    
    total[zipcode] = zipcode_information
    driver.execute_script("window.history.go(-1)")

In [288]:
print(len(total))

487


### Combine 3 Separate Dictionaries into 1 Dict with all Project Sunroof Info
#### Convert to Pandas DataFrame

In [289]:
# third_set_zipcodes = total

In [254]:
# first_set_zipcodes = total

In [277]:
# second_set_zipcodes = total

In [279]:
len(first_set_zipcodes)

1186

In [280]:
len(second_set_zipcodes)

2012

In [291]:
len(third_set_zipcodes)

487

In [263]:
#completed_zips = [k for k, v in first_set_zipcodes.items()]

In [273]:
#second_round_zipcodes = [i for i in zips_to_use if i not in completed_zips]

In [282]:
second_completed_zips = [k for k, v in second_set_zipcodes.items()]

In [284]:
third_round_zipcodes = [i for i in second_round_zipcodes if i not in second_completed_zips]

In [296]:
d_all_solar_info = {}
d_all_solar_info.update(first_set_zipcodes)
d_all_solar_info.update(second_set_zipcodes)
d_all_solar_info.update(third_set_zipcodes)

## Pandas 

In [442]:
df_solar = pd.DataFrame.from_dict(d_all_solar_info, orient='index')

In [444]:
df_solar.rename(columns={'Tax_Credit_Info':'Solar_Incentives'}, inplace=True)
df_solar['Solar_Incentives'] = df_solar.Solar_Incentives.astype('str')
df_solar['Solar_Incentives'].replace("'SOLAR INCENTIVES',", '', regex=True, inplace=True)
df_solar.Solar_Incentives.replace(r"\[|\]|\'", '', regex=True, inplace=True)

In [447]:
pd.set_option('max_colwidth', 100)

In [448]:
df_solar.replace("", np.nan, inplace=True)
df_solar.reset_index(inplace=True)
df_solar.rename(columns={'index':'zipcode'}, inplace=True)

Solar Numbers  
0. Number of existing Solar Installations (If available)
1. Percentage of Viable Solar Roofs
2. Total number of roofs that are Solar Viable
3. Total Sq. Footage of Roof Space
4. Total Solar Electrical Capacity in MegaWatts (DC)
5. Total Solar Electrical Production in MWh (AC) per Year
6. Roof Space Per Roof - Square Footage
7. Capacity Per Roof - kW (DC)
8. Solar Electricity Production per roof - kWh (AC) per Year

If All Viable Solar installations were implemented   

9. Amount of avoided CO2 emissions from electricity sector in metric tons
10. Number of Passenger Cars taken off the road for one year  
11. Number of tree seedlings grown in a ten year span


In [458]:
# Remove first element in list for entire Series 
for index, value in df_solar['Solar_Numbers'].iteritems():
    if len(value) > 0:
        del value[0]

In [461]:
#Insert nan at first position in list
for index, value in df_solar['Solar_Numbers'].iteritems():
    if len(value) == 11:
        value.insert(0, np.nan)

In [463]:
tags = df_solar['Solar_Numbers'].apply(pd.Series)

In [467]:
tags.rename(columns={0:'existing_solars', 1:'viable_solar_roofs', 2:'total_viable_roofs',
                     3:'total_sq_foot_roof', 4:'total_capacity', 5:'total_production', 
                    6:'roof_space_per_roof', 7:'capacity_per_roof', 8:'production_per_roof', 
                    9:'avoided_CO2', 10:'cars', 12:'trees'}, inplace=True)

In [471]:
df_solar = pd.concat([df_solar, tags], axis=1)

In [473]:
del df_solar['Solar_Numbers']

In [477]:
df_solar.rename(columns={11:'Trees'}, inplace=True)

In [479]:
pd.to_pickle(df_solar, './solar_numbers.pkl')