In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import config

In [2]:
#Load google.cloud.bigquery
%load_ext google.cloud.bigquery

In [3]:
#Select path to credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=config.GOOGLE_APPLICATION_CREDENTIALS

In [4]:
%%bigquery --use_rest_api ZRI_MF
SELECT *
FROM `high-empire-220313.ZRI.Multi_Family`

In [5]:
year_columns = [x for x in ZRI_MF.columns if ('20' in x)]

In [6]:
ZRI_MF_long = ZRI_MF.melt(id_vars = ['RegionID','RegionName','City','State','Metro','CountyName','SizeRank'],
            value_vars = year_columns).rename({'value':'ZRI','variable':'Date'}, axis = 1)

In [7]:
ZRI_MF_long.loc[:,'Month'] = ZRI_MF_long['Date'].apply(lambda x: int(x[-2:]))
ZRI_MF_long.loc[:,'Year'] = ZRI_MF_long['Date'].apply(lambda x: int(x[1:5]))

In [8]:
ZRI_MF_long

Unnamed: 0,RegionID,RegionName,City,State,Metro,CountyName,SizeRank,Date,ZRI,Month,Year
0,91982,77494,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,4,_2010_09,,9,2010
1,61616,10002,New York,NY,New York-Newark-Jersey City,New York County,7,_2010_09,,9,2010
2,91733,77084,Houston,TX,Houston-The Woodlands-Sugar Land,Harris County,8,_2010_09,,9,2010
3,92593,78660,Pflugerville,TX,Austin-Round Rock,Travis County,13,_2010_09,,9,2010
4,97564,94109,San Francisco,CA,San Francisco-Oakland-Hayward,San Francisco County,14,_2010_09,,9,2010
...,...,...,...,...,...,...,...,...,...,...,...
210288,96040,90059,Los Angeles,CA,Los Angeles-Long Beach-Anaheim,Los Angeles County,1445,_2020_01,2185.0,1,2020
210289,82256,55406,Minneapolis,MN,Minneapolis-St. Paul-Bloomington,Hennepin County,781,_2020_01,1357.0,1,2020
210290,62476,12206,Albany,NY,Albany-Schenectady-Troy,Albany County,1724,_2020_01,,1,2020
210291,63351,13905,Binghamton,NY,Binghamton,Broome County,1295,_2020_01,844.0,1,2020


In [15]:
ZRI_MF_long.groupby('RegionName')[['ZRI']].std()

Unnamed: 0_level_0,ZRI
RegionName,Unnamed: 1_level_1
1013,80.327818
1020,90.116944
1040,65.796650
1085,78.389602
1089,69.664584
...,...
99501,39.632575
99504,54.458942
99508,51.731282
99654,51.172199


In [16]:
ZRI_MF_long.groupby('RegionName')[['ZRI']].mean()

Unnamed: 0_level_0,ZRI
RegionName,Unnamed: 1_level_1
1013,1014.318584
1020,1037.911504
1040,1016.410714
1085,1105.521739
1089,1093.813559
...,...
99501,1263.638889
99504,1460.407407
99508,1280.205882
99654,1281.414286


In [14]:
df = ZRI_MF_long.groupby('RegionName')[['ZRI', 'RegionID']].agg({'ZRI':'mean', 'ZRI':'std'})
df.columns = ['mean', 'std']
df

Unnamed: 0_level_0,mean,std
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1
1013,80.327818,0.0
1020,90.116944,0.0
1040,65.796650,0.0
1085,78.389602,0.0
1089,69.664584,0.0
...,...,...
99501,39.632575,0.0
99504,54.458942,0.0
99508,51.731282,0.0
99654,51.172199,0.0


In [33]:
%%bigquery --use_rest_api ZIPS
SELECT *
FROM `high-empire-220313.InfoByZipcode.USzips`

In [37]:
%%bigquery --use_rest_api ZIP_Area
SELECT *
FROM `high-empire-220313.InfoByZipcode.Zipcode_Water_Land_area_lat_long_county_zipcodegeom`

In [58]:
ZIP_Area.loc[:,'zipcode'] = ZIP_Area.zipcode.astype(int)

In [43]:
df = ZRI_MF_long.merge(ZIPS,how = 'left', left_on = 'RegionName',right_on = 'zip')

In [56]:
df.merge(ZIP_Area[['area_land_meters','area_water_meters','area_land_miles','area_water_miles','zipcode']],
         how = 'left',
         left_on = 'zip',
         right_on = 'zipcode')

Unnamed: 0,RegionID,RegionName,City,State,Metro,CountyName,SizeRank,Date,ZRI,Month,...,county_names_all,county_fips_all,imprecise,military,timezone,area_land_meters,area_water_meters,area_land_miles,area_water_miles,zipcode
0,91982,77494,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,4,_2010_09,,9,...,Fort Bend|Harris|Waller,48157|48201|48473,False,False,America/Chicago,105779044.0,637347.0,40.842,0.246,77494.0
1,61616,10002,New York,NY,New York-Newark-Jersey City,New York County,7,_2010_09,,9,...,New York,36061,False,False,America/New_York,2275984.0,0.0,0.879,0.000,10002.0
2,91733,77084,Houston,TX,Houston-The Woodlands-Sugar Land,Harris County,8,_2010_09,,9,...,Harris,48201,False,False,America/Chicago,79386698.0,315092.0,30.651,0.122,77084.0
3,92593,78660,Pflugerville,TX,Austin-Round Rock,Travis County,13,_2010_09,,9,...,Travis,48453,False,False,America/Chicago,117649320.0,60708.0,45.425,0.023,78660.0
4,97564,94109,San Francisco,CA,San Francisco-Oakland-Hayward,San Francisco County,14,_2010_09,,9,...,San Francisco,06075,False,False,America/Los_Angeles,3077742.0,295388.0,1.188,0.114,94109.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210288,96040,90059,Los Angeles,CA,Los Angeles-Long Beach-Anaheim,Los Angeles County,1445,_2020_01,2185.0,1,...,Los Angeles,06037,False,False,America/Los_Angeles,8581847.0,42455.0,3.313,0.016,90059.0
210289,82256,55406,Minneapolis,MN,Minneapolis-St. Paul-Bloomington,Hennepin County,781,_2020_01,1357.0,1,...,Hennepin,27053,False,False,America/Chicago,12954605.0,572948.0,5.002,0.221,55406.0
210290,62476,12206,Albany,NY,Albany-Schenectady-Troy,Albany County,1724,_2020_01,,1,...,Albany,36001,False,False,America/New_York,5503149.0,0.0,2.125,0.000,12206.0
210291,63351,13905,Binghamton,NY,Binghamton,Broome County,1295,_2020_01,844.0,1,...,Broome,36007,False,False,America/New_York,70053779.0,632017.0,27.048,0.244,13905.0
