In [1]:
import pandas as pd
pd.options.display.float_format = '{:.2f}'.format #turn off scientific notation
import numpy as np

#read Zillow 3 bedroom data by county into pandas dataframe
file = "County_Zhvi_3bedroom.csv"
zillow_df = pd.read_csv("County_Zhvi_3bedroom.csv", encoding = "latin-1")
zillow_df.head()

Unnamed: 0,RegionID,RegionName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,2008-01,2008-02,2008-03,...,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11
0,3101,Los Angeles County,CA,Los Angeles-Long Beach-Anaheim,6,37,1,,,,...,600754.33,599393.33,599503.33,600387.0,602727.33,600822.0,600501.67,601233.0,605862.0,609034.0
1,139,Cook County,IL,Chicago-Naperville-Elgin,17,31,2,,,,...,226143.33,227082.67,228167.0,228926.67,228869.67,228349.67,227408.0,226548.33,225984.67,225928.67
2,1090,Harris County,TX,Houston-The Woodlands-Sugar Land,48,201,3,124604.0,124348.5,124172.33,...,173528.0,173910.33,173899.0,173980.0,174518.0,175041.0,175728.0,176058.33,176914.0,177541.0
3,2402,Maricopa County,AZ,Phoenix-Mesa-Scottsdale,4,13,4,,,,...,260194.0,261705.0,262585.67,263689.0,264876.67,266422.33,268442.0,270188.67,272057.67,273468.67
4,2841,San Diego County,CA,San Diego-Carlsbad,6,73,5,415019.0,410783.0,406752.67,...,561999.67,561455.0,563480.33,564365.67,566461.0,568930.33,571215.67,571894.67,573083.0,575533.33


In [2]:
#get list of column names
column_names= zillow_df.columns
column_names

#keep all rows of the following counties:  Shasta County, Los Angeles County, San Francisco County, Fresno County
zillow_df = zillow_df.loc[(zillow_df["RegionName"] == "Los Angeles County") | 
                            (zillow_df["RegionName"] == "Fresno County") |
                            (zillow_df["RegionName"] == "San Francisco County") |
                            (zillow_df["RegionName"] == "Shasta County"), :]

# use .copy() to avoid SettingWithCopyWarning warning
counties_df= zillow_df[["RegionName"]].copy()

#Calc average yearly price from 2014 - 2018 of
counties_df["2014 Avg Price"] = zillow_df.loc[:,"2014-01":"2014-12"].sum(axis=1)/12
counties_df["2015 Avg Price"] = zillow_df.loc[:,"2015-01":"2015-12"].sum(axis=1)/12
counties_df["2016 Avg Price"] = zillow_df.loc[:,"2016-01":"2016-12"].sum(axis=1)/12
counties_df["2017 Avg Price"] = zillow_df.loc[:,"2017-01":"2017-12"].sum(axis=1)/12
counties_df["2018 Avg Price"] = zillow_df.loc[:,"2018-01":"2018-12"].sum(axis=1)/12
counties_df.head()


#Keep the following columns = Region name and columns from years 14 - 18
#counties_df = zillow_df.iloc[:, [1, 79:140]] - ask ta
#counties_df = zillow_df.iloc[:, 79:140]


Unnamed: 0,RegionName,2014 Avg Price,2015 Avg Price,2016 Avg Price,2017 Avg Price,2018 Avg Price
0,Los Angeles County,456951.25,482374.33,518062.94,555362.75,596094.53
44,Fresno County,180313.22,191649.06,203774.64,220687.5,238799.28
64,San Francisco County,1194854.92,1292884.22,1388945.64,1471771.03,1620575.75
342,Shasta County,202065.47,216334.31,226238.28,241436.89,253039.0


In [5]:
#add income data to counties_df.  Data from https://www.bea.gov/system/files/2019-11/lapi1119.pdf. 
#there's no downloadable version, so entering by hand and making a dictionary

column_names= list(counties_df["RegionName"])

income_df = pd.DataFrame({"RegionName":column_names,
                         "2014 Income":[0, 0, 0, 0],
                         "2015 Income":[1, 0, 0, 0],
                         "2016 Income":[2, 2, 2, 2],
                         "2017 Income":[3, 3, 3, 3],
                         "2018 Income":[4, 4, 4, 4]
                         })
income_df.head()

#merge counties and income dataframes

merge_df = pd.merge(counties_df, income_df, on="RegionName")

merge_df.head()

Unnamed: 0,RegionName,2014 Avg Price,2015 Avg Price,2016 Avg Price,2017 Avg Price,2018 Avg Price,2014 Income,2015 Income,2016 Income,2017 Income,2018 Income
0,Los Angeles County,456951.25,482374.33,518062.94,555362.75,596094.53,0,1,2,3,4
1,Fresno County,180313.22,191649.06,203774.64,220687.5,238799.28,0,0,2,3,4
2,San Francisco County,1194854.92,1292884.22,1388945.64,1471771.03,1620575.75,0,0,2,3,4
3,Shasta County,202065.47,216334.31,226238.28,241436.89,253039.0,0,0,2,3,4
