In [14]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [15]:
data = pd.read_csv('../data/data_w_pop.csv')
data = data.sort_values('pop2020')
# Problems with San Diego and Alexandria, count_qualified = 15 and 6
data = data[~data['region_name'].isin(['San Diego','Alexandria'])]

In [3]:
def estimate(json_file,flat_fraction):
    # Each row in dataset contains json-like data structure, converts
    sizes = json.loads(json_file)
    # Assumptions for parameters
    flat_fraction = flat_fraction
    kw_per_m2 = .15
    threshold = 929.0304
    # Initialize lists
    kws = []
    flat_roof_size = []
    roofs = []
    # Loop through each bin and the distribution of the capacity                                 
    for size in sizes:
        # Predicted rooftop solar capacity in KWs DC
        kw_install = size[0]
        # Number of roofs in this bin
        n_roof = size[1]
        # Convert to watts, divide by kw / m2, multiply by fraction flat 
        m2 = kw_install * flat_fraction / kw_per_m2
        # Append to lists
        kws.append(kw_install)        
        flat_roof_size.append(m2)
        roofs.append(n_roof)
    # Convert to numpy for element-wise multiplication property
    flat_roof_size = np.array(flat_roof_size) 
    roofs = np.array(roofs)
    kws = np.array(kws)
    # Only considers roofs above 10,000 Square Feet
    size_above = flat_roof_size[flat_roof_size > threshold]
    roofs_above = roofs[flat_roof_size > threshold] 
    kws_above = kws[flat_roof_size > threshold]    
    # return (total meter squared surface area, number of roofs / farms, total capacity loss to farming)
    return (np.sum(size_above * roofs_above),np.sum(roofs_above), np.sum(kws_above * roofs_above * flat_fraction))

In [4]:
n = data.shape[0]

p_factor = 21.5
frac_farmed = 0.6

p_by_city = []
m2_total_by_city = []
n_roofs_by_city = []
kwhs_loss_by_city = []

flat_fraction_by_city = data['number_of_panels_f'] / data['number_of_panels_total']
kwh_frac_total_by_city = data['yearly_sunlight_kwh_total'] / data['kw_total']

cities = data['install_size_kw_buckets_json'].to_numpy()

for idx, city in enumerate(cities):
    # Get capacity total for each city
    flat_fraction = flat_fraction_by_city.iloc[idx]
    kwh_fraction = kwh_frac_total_by_city.iloc[idx]
    m2_total, n_roofs_total, capacity_loss = estimate(city,flat_fraction)

    p = m2_total  * p_factor * frac_farmed
    kwh = capacity_loss * kwh_fraction

    
    p_by_city.append(p)
    m2_total_by_city.append(m2_total)
    n_roofs_by_city.append(n_roofs_total)
    kwhs_loss_by_city.append(kwh)

In [5]:
# Assign Variables
data['p'] = p_by_city
data['m2_total'] = m2_total_by_city
data['farms'] = n_roofs_by_city
data['frac_farms'] = data['farms'] / data['count_qualified']
data['p_per_capita'] = data['p'] / data['pop2020']
data['kwh_loss'] = kwhs_loss_by_city
data['kwh_loss_pct'] = data['kwh_loss'] / data['yearly_sunlight_kwh_total'] * 100
data[['region_name','state_name','pop2020','p','m2_total','farms','frac_farms','count_qualified','p_per_capita','kwh_loss','kwh_loss_pct','yearly_sunlight_kwh_total']].sort_values('count_qualified',ascending=True).iloc[:30]



Unnamed: 0,region_name,state_name,pop2020,p,m2_total,farms,frac_farms,count_qualified,p_per_capita,kwh_loss,kwh_loss_pct,yearly_sunlight_kwh_total
26,Arlington,Virginia,231803,1079868.0,83710.72,31,0.072093,430.0,4.65856,15225120.0,54.948122,27708170.0
20,Denton,Texas,142173,57879.55,4486.787,2,0.000225,8907.0,0.407106,919859.1,0.665136,138296400.0
112,North Las Vegas,Nevada,253923,1086277.0,84207.55,41,0.002731,15011.0,4.27798,20289390.0,5.114404,396710800.0
125,Paterson,New Jersey,145871,2876339.0,222972.0,120,0.006194,19375.0,19.718373,38938960.0,11.752998,331310800.0
89,McKinney,Texas,208487,647380.9,50184.56,32,0.001544,20730.0,3.105138,9995426.0,2.07498,481712000.0
195,Yonkers,New York,199021,3717611.0,288186.9,147,0.00635,23150.0,18.679493,49491320.0,12.16444,406852400.0
191,Bridgeport,Connecticut,143010,3591013.0,278373.1,142,0.006047,23481.0,25.11022,48856960.0,11.050044,442142700.0
86,Macon,Georgia,152519,5973989.0,463099.9,228,0.008544,26686.0,39.168817,90168670.0,11.911042,757017500.0
106,Bellevue,Washington,154647,3803604.0,294853.0,161,0.005798,27770.0,24.595392,42258360.0,7.771178,543783200.0
69,Jersey City,New Jersey,271099,15540360.0,1204679.0,341,0.011749,29024.0,57.323552,210409500.0,33.707385,624223800.0


In [6]:
data['p'].sum()

3564060918.9627

In [7]:
data[['region_name','state_name','pop2020','p','m2_total','farms','frac_farms','count_qualified','p_per_capita','kwh_loss','kwh_loss_pct','yearly_sunlight_kwh_total']].describe()

Unnamed: 0,pop2020,p,m2_total,farms,frac_farms,count_qualified,p_per_capita,kwh_loss,kwh_loss_pct,yearly_sunlight_kwh_total
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,414063.5,18277240.0,1416840.0,594.989744,0.007315,85427.020513,47.773604,274696600.0,11.950284,2315222000.0
std,715747.7,23868960.0,1850307.0,749.633491,0.006574,92188.753309,37.378277,353416400.0,8.604361,2456594000.0
min,142049.0,57879.55,4486.787,2.0,0.000183,430.0,0.407106,919859.1,0.156549,27708170.0
25%,179987.5,4844255.0,375523.6,192.0,0.003726,40162.0,20.480381,70200380.0,6.048634,1125807000.0
50%,231803.0,11520900.0,893092.7,386.0,0.006155,55577.0,41.490932,168027000.0,10.480953,1634277000.0
75%,389449.5,20674700.0,1602690.0,650.0,0.00922,81836.5,65.739166,314410300.0,16.175236,2419065000.0
max,8323340.0,207766400.0,16105920.0,6815.0,0.072093,647621.0,221.361256,3076044000.0,54.948122,18337010000.0
