In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [4]:
data = pd.read_csv('../data/data_w_pop.csv')


Index(['region_name', 'state_name', 'lat_max', 'lat_min', 'lng_max', 'lng_min',
       'lat_avg', 'lng_avg', 'yearly_sunlight_kwh_kw_threshold_avg',
       'count_qualified', 'percent_covered', 'percent_qualified',
       'number_of_panels_n', 'number_of_panels_s', 'number_of_panels_e',
       'number_of_panels_w', 'number_of_panels_f', 'number_of_panels_median',
       'number_of_panels_total', 'kw_median', 'kw_total',
       'yearly_sunlight_kwh_n', 'yearly_sunlight_kwh_s',
       'yearly_sunlight_kwh_e', 'yearly_sunlight_kwh_w',
       'yearly_sunlight_kwh_f', 'yearly_sunlight_kwh_median',
       'yearly_sunlight_kwh_total', 'install_size_kw_buckets_json',
       'carbon_offset_metric_tons', 'existing_installs_count', 'name', 'state',
       'pop2020', 'density'],
      dtype='object')

In [41]:
def get_produce_and_roofs(json_file,flat_fraction):
    # Each row in dataset contains json-like data structure, converts
    sizes = json.loads(json_file)
    # Assumptions for parameters
    flat_fraction = flat_fraction
    kw_per_m2 = .15
    threshold = 929.0304
    # Initialize lists
    flat_roof_size = []
    roofs = []
    # Loop through each bin and the distribution of the capacity                                 
    for size in sizes:
        # Predicted rooftop solar capacity in KWs DC
        kw_install = size[0]
        # Number of roofs in this bin
        n_roof = size[1]
        # Convert to watts, divide by kw / m2, multiply by fraction flat 
        m2 = kw_install * flat_fraction / kw_per_m2
        # Append to lists
        flat_roof_size.append(m2)
        roofs.append(n_roof)
    # Convert to numpy for element-wise multiplication property
    flat_roof_size = np.array(flat_roof_size) 
    roofs = np.array(roofs)
    # Only considers roofs above 10,000 Square Feet
    size_above = flat_roof_size[flat_roof_size > threshold]
    roofs_above = roofs[flat_roof_size > threshold]      
    # return total meter squared surface area, number of roofs / farms
    return (np.sum(size_above * roofs_above),np.sum(roofs_above))

In [42]:
n = data.shape[0]

p_factor = 21.5

p_by_city = []
m2_total_by_city = []
n_roofs_by_city = []

flat_fraction_by_city = data['number_of_panels_f'] / data['number_of_panels_total']
cities = data['install_size_kw_buckets_json'].to_numpy()

for idx, city in enumerate(cities):
    # Get capacity total for each city
    flat_fraction = flat_fraction_by_city.iloc[idx]
    m2_total, n_roofs_total = get_produce_and_roofs(city,flat_fraction)

    p = m2_total  * p_factor
    p_by_city.append(p)
    m2_total_by_city.append(m2_total)
    n_roofs_by_city.append(n_roofs_total)

In [43]:
# Assign Variables
data['p'] = p_by_city
data['m2_total'] = m2_total_by_city
data['n_roofs'] = n_roofs_by_city
data['p_per_capita'] = data['p'] / data['pop2020']
data[['region_name','state_name','pop2020','p','m2_total','n_roofs','p_per_capita']].sort_values('p_per_capita',ascending=False).iloc[:30]

Unnamed: 0,region_name,state_name,pop2020,p,m2_total,n_roofs,p_per_capita
150,Salt Lake City,Utah,213367,78718650.0,3661332.0,1177,368.935427
118,Ontario,California,186653,66601110.0,3097726.0,847,356.817787
90,Memphis,Tennessee,647374,192488200.0,8952939.0,2644,297.336924
4,Chattanooga,Tennessee,184143,47561690.0,2212172.0,868,258.286722
71,Kansas City,Kansas,153600,37143610.0,1727610.0,560,241.82041
143,Rockford,Illinois,145020,34421460.0,1600998.0,563,237.35663
138,Birmingham,Alabama,208940,47512200.0,2209870.0,930,227.396364
171,St. Louis,Missouri,293792,58421150.0,2717263.0,1088,198.852066
81,Little Rock,Arkansas,197371,39159280.0,1821362.0,765,198.404432
51,Greensboro,North Carolina,299946,58655930.0,2728183.0,1030,195.554961


In [44]:
data['p'].sum()

6143601956.528593