In [60]:
import pandas as pd
IN_PATH = "~/Dropbox/CDS-2019-AlbanyHub/Processed-Data/"
OUT_PATH = "~/Dropbox/CDS-2019-AlbanyHub/Tables/"

## 1. Unnormalized Tables
#### Read in csv files

In [61]:
block = pd.read_csv("~/Dropbox/CDS-2019-AlbanyHub/ToDatabase/full_blockgroup.csv") #information on block groups from census
address_counts = pd.read_csv(IN_PATH+"address_count_per_blockgroup.csv") #number of addresses per block group
#number of housing projects per block group
address_count_block = pd.read_csv(IN_PATH+"address_count_per_blockgroup_housingproject.csv")
#Average kwh per block group for average albany home
avg_kwh = pd.read_csv(IN_PATH+"avgkwh_blockgroup_elec.csv")
#Average kwh per block group for housing projects
avg_kwh_block = pd.read_csv(IN_PATH+"avg_kwh_per_blockgroup.csv")

In [62]:
#Crop and rename to fit into other files
df = (block[["total_vacdata", "block_group", "tract"]]).rename(columns = {"tract":"Tract","block_group":"BlockGroup", "total_vacdata":"total_properties"})

In [None]:
#Merge datasets and fill nan
df_full = df.merge(address_counts, on = ["Tract", "BlockGroup"], how = "inner")
df_full = df_full.merge(address_count_block, on = ['Tract', "BlockGroup"], how = "left")
df_full = df_full.merge(avg_kwh, on = ['Tract', "BlockGroup"], how = "left")
df_full = df_full.merge(avg_kwh_block, on = ['Tract', "BlockGroup"], how = "left")

In [None]:
df_full = df_full.fillna(0)

In [None]:
df_full = df_full.rename(columns = {"address_count_x":"utilities_address_count", "address_count_y":"projects_address_count"})

In [None]:
#Get count of non project addresses
df_full["nonproj_address_count"] = df_full["utilities_address_count"] - df_full["projects_address_count"]

In [None]:
df_full["proj_pct"] = df_full["projects_address_count"] / df_full["utilities_address_count"] * 100

In [None]:
df_full["nonproj_pct"] = 100 - df_full["proj_pct"]

In [None]:
df_full = df_full.drop("address_count", axis = 1)

In [None]:
df_full = df_full.rename(columns = {"mean_kwh":"project_mean_kwh", "avg_kwh":"nonproj_mean_kwh"}).drop("numrecords", axis = 1)

In [None]:
cols = list(df_full)
cols.insert(0, cols.pop(cols.index('Tract')))
cols.insert(1, cols.pop(cols.index("BlockGroup")))
cols.insert(5, cols.pop(cols.index("nonproj_address_count")))
cols.insert(6, cols.pop(cols.index("proj_pct")))
cols.insert(7, cols.pop(cols.index("nonproj_pct")))
df_full = df_full.loc[:, cols]

In [None]:
sum_props = df_full["total_properties"].sum()
sum_utilities = df_full["utilities_address_count"].sum()
sum_projects = df_full["projects_address_count"].sum()
sum_nonproj = df_full["nonproj_address_count"].sum()

In [None]:
df_full = df_full.append({"total_properties" : sum_props, "utilities_address_count" : sum_utilities,
                         "projects_address_count" : sum_projects, "nonproj_address_count" : sum_nonproj}, ignore_index =  True)

In [None]:
df_full.loc[66, "Tract"] = "Totals"
df_full.loc[66, "proj_pct"] = sum_projects/sum_utilities*100
df_full.loc[66, "nonproj_pct"] = sum_nonproj/sum_utilities*100
df_full.loc[66, "project_mean_kwh"] = 798.2

In [None]:
df_full.to_csv('albany.csv', index =  False)

## 2. Same table, normalized by sqft

In [4]:
#Average consumption per square foot for housing projects per block group
per_sqft_proj = pd.read_csv(IN_PATH+"avgkwh_proj_persqft.csv") 
#Average consumption per square foot for average albany home per block group
per_sqft = pd.read_csv(IN_PATH+"avgkwh_persqft.csv")

In [10]:
#Merge datasets and fill nan
df_full = df.merge(address_counts, on = ["Tract", "BlockGroup"], how = "inner")
df_full = df_full.merge(address_count_block, on = ['Tract', "BlockGroup"], how = "left").rename(columns = {"address_count_x":"utilities_address_count", "address_count_y":"projects_address_count"})
df_full = df_full.merge(per_sqft_proj, on = ['Tract', "BlockGroup"], how = "left").rename(columns={'mean_kwh_persqft':'mean_kwh_persqft_proj', 'mean_charge_persqft':'mean_charge_persqft_proj', 'address_count':'projects_address_count_wsqft'})
df_full = df_full.merge(per_sqft, on = ['Tract', "BlockGroup"], how = "left").rename(columns={'address_count':'utilities_address_count_wsqft'})
df_full = df_full.fillna(0)
df_full.head()

Unnamed: 0,total_properties,BlockGroup,Tract,utilities_address_count,projects_address_count,projects_address_count_wsqft,mean_kwh_persqft_proj,mean_charge_persqft_proj,utilities_address_count_wsqft,mean_kwh_persqft,mean_charge_persqft
0,433,1,100,193,10.0,10.0,0.90072,0.107193,178,0.955091,0.113594
1,175,2,100,118,1.0,1.0,0.595525,0.075957,97,0.725172,0.086507
2,397,3,100,344,7.0,7.0,0.731609,0.087106,172,0.977478,0.117104
3,938,4,100,297,6.0,6.0,0.151687,0.017874,204,0.255851,0.03082
4,493,5,100,247,4.0,4.0,1.136605,0.128418,245,0.893225,0.108068


In [11]:
#Create new columns
df_full["nonproj_address_count"] = df_full["utilities_address_count"] - df_full["projects_address_count"]
df_full["proj_pct"] = df_full["projects_address_count"] / df_full["utilities_address_count"] * 100
df_full["nonproj_pct"] = 100 - df_full["proj_pct"]

In [18]:
#Share of projects per block group
df_full['project_share_pct'] = df_full['projects_address_count']/df_full['projects_address_count'].sum() * 100
#Percent with square footage
df_full['project_wsqft_pct'] = df_full['projects_address_count_wsqft']/df_full['projects_address_count'] * 100
df_full['utilities_wsqft_pct'] = df_full['utilities_address_count_wsqft']/df_full['utilities_address_count'] * 100

In [19]:
df_full.head()
df_full.columns

Index(['total_properties', 'BlockGroup', 'Tract', 'utilities_address_count',
       'projects_address_count', 'projects_address_count_wsqft',
       'mean_kwh_persqft_proj', 'mean_charge_persqft_proj',
       'utilities_address_count_wsqft', 'mean_kwh_persqft',
       'mean_charge_persqft', 'nonproj_address_count', 'proj_pct',
       'nonproj_pct', 'project_share_pct', 'project_wsqft_pct',
       'utilities_wsqft_pct'],
      dtype='object')

In [36]:
sum_props = df_full["total_properties"].sum()
sum_utilities = df_full["utilities_address_count"].sum()
sum_projects = df_full["projects_address_count"].sum()
sum_nonproj = df_full["nonproj_address_count"].sum()
sum_utilities_wsqft = df_full["utilities_address_count_wsqft"].sum()
sum_projects_wsqft = df_full["projects_address_count_wsqft"].sum()

In [40]:
df_utilities.tail()

Unnamed: 0,Tract,BlockGroup,total_properties,utilities_address_count_wsqft,projects_address_count_wsqft,mean_kwh_persqft,mean_charge_persqft,mean_kwh_persqft_proj,mean_charge_persqft_proj
61,11200,3,388,38,0.0,0.266445,0.031503,0.0,0.0
62,11300,1,1227,415,7.0,0.541738,0.065573,0.325484,0.039086
63,11400,1,427,225,5.0,0.843973,0.101841,0.357786,0.045885
64,11400,2,181,81,2.0,0.395809,0.049328,0.471495,0.05527
65,11600,1,649,64,0.0,0.81084,0.095616,0.0,0.0


In [53]:
#Reorder columns
df_demographic = df_full[['Tract', 'BlockGroup', 'total_properties', 'utilities_address_count','projects_address_count', 'nonproj_address_count', 'proj_pct', 'nonproj_pct','project_share_pct']]
df_squarefootage = df_full[['Tract', 'BlockGroup', 'total_properties', 'utilities_address_count', 'utilities_address_count_wsqft', 'projects_address_count', 'projects_address_count_wsqft', 'project_wsqft_pct','utilities_wsqft_pct']]
df_utilities = df_full[['Tract', 'BlockGroup', 'total_properties', 'utilities_address_count_wsqft','projects_address_count_wsqft', 'mean_kwh_persqft','mean_charge_persqft','mean_kwh_persqft_proj', 'mean_charge_persqft_proj']]
# Add totals row
df_demographic = df_demographic.append({"Tract":"Totals", "total_properties" : sum_props, "utilities_address_count" : sum_utilities,
                         "projects_address_count" : sum_projects, "nonproj_address_count" : sum_nonproj,
                        "proj_pct" :sum_projects/sum_utilities*100, "nonproj_pct":sum_nonproj/sum_utilities*100,
                        "project_share_pct":df_demographic['project_share_pct'].sum()}, ignore_index =  True)
df_demographic.tail()

Unnamed: 0,Tract,BlockGroup,total_properties,utilities_address_count,projects_address_count,nonproj_address_count,proj_pct,nonproj_pct,project_share_pct
62,11300,1.0,1227,462,7.0,455.0,1.515152,98.484848,1.525054
63,11400,1.0,427,322,10.0,312.0,3.10559,96.89441,2.178649
64,11400,2.0,181,126,2.0,124.0,1.587302,98.412698,0.43573
65,11600,1.0,649,64,0.0,64.0,0.0,100.0,0.0
66,Totals,,39548,21485,459.0,21026.0,2.136374,97.863626,100.0


In [54]:
df_utilities = df_utilities.append({"Tract":"Totals", 
                                    "total_properties" : sum_props, 
                                    "utilities_address_count_wsqft" : sum_utilities_wsqft,
                                    "projects_address_count_wsqft" : sum_projects_wsqft,
                                    "mean_kwh_persqft": 0.645961769794305,
                                    "mean_charge_persqft":0.0777166398738248,
                                    "mean_kwh_persqft_proj":0.112848709943183,
                                    "mean_charge_persqft_proj": 0.0136344901020302}, ignore_index=True)
df_utilities.tail()

Unnamed: 0,Tract,BlockGroup,total_properties,utilities_address_count_wsqft,projects_address_count_wsqft,mean_kwh_persqft,mean_charge_persqft,mean_kwh_persqft_proj,mean_charge_persqft_proj
62,11300,1.0,1227,415,7.0,0.541738,0.065573,0.325484,0.039086
63,11400,1.0,427,225,5.0,0.843973,0.101841,0.357786,0.045885
64,11400,2.0,181,81,2.0,0.395809,0.049328,0.471495,0.05527
65,11600,1.0,649,64,0.0,0.81084,0.095616,0.0,0.0
66,Totals,,39548,18475,399.0,0.645962,0.077717,0.112849,0.013634


In [55]:
df_squarefootage = df_squarefootage.append({
                                    "Tract":"Totals", 
                                    "total_properties" : sum_props,
                                    "utilities_address_count" : sum_utilities,
                                    "projects_address_count" : sum_projects,
                                    "utilities_address_count_wsqft" : sum_utilities_wsqft,
                                    "projects_address_count_wsqft" : sum_projects_wsqft,
                                    "project_wsqft_pct" : sum_projects_wsqft/sum_projects,
                                    "utilities_wsqft_pct":sum_utilities_wsqft/sum_utilities}, ignore_index=True)
df_squarefootage.tail()

Unnamed: 0,Tract,BlockGroup,total_properties,utilities_address_count,utilities_address_count_wsqft,projects_address_count,projects_address_count_wsqft,project_wsqft_pct,utilities_wsqft_pct
62,11300,1.0,1227,462,415,7.0,7.0,100.0,89.82684
63,11400,1.0,427,322,225,10.0,5.0,50.0,69.875776
64,11400,2.0,181,126,81,2.0,2.0,100.0,64.285714
65,11600,1.0,649,64,64,0.0,0.0,,100.0
66,Totals,,39548,21485,18475,459.0,399.0,0.869281,0.859902


In [58]:
df_utilities.to_csv("elec_normalized1.csv", index=False)
df_squarefootage.to_csv("elec_normalized2.csv", index=False)
df_demographic.to_csv("elec_normalized3.csv", index=False)

## Some odd values in here -> Try with only Single family residence/townhouse