In [4]:
import pandas as pd
pd.options.display.float_format = '{:.2f}'.format #turn off scientific notation
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

## Read zillow Data

In [5]:
#read Zillow 3 bedroom data by county into pandas dataframe 
#data from zillow.com/data
#we decided to only look at 5 years of data from 2014 - 2018 and 4 counties - LA, SF, Fresno and Shasta
file = "resources/County_Zhvi_3bedroom.csv" 
zillow_df = pd.read_csv(file, encoding = "latin-1") 
zillow_df.head()

Unnamed: 0,RegionID,RegionName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,2008-01,2008-02,2008-03,...,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11
0,3101,Los Angeles County,CA,Los Angeles-Long Beach-Anaheim,6,37,1,,,,...,600754.33,599393.33,599503.33,600387.0,602727.33,600822.0,600501.67,601233.0,605862.0,609034.0
1,139,Cook County,IL,Chicago-Naperville-Elgin,17,31,2,,,,...,226143.33,227082.67,228167.0,228926.67,228869.67,228349.67,227408.0,226548.33,225984.67,225928.67
2,1090,Harris County,TX,Houston-The Woodlands-Sugar Land,48,201,3,124604.0,124348.5,124172.33,...,173528.0,173910.33,173899.0,173980.0,174518.0,175041.0,175728.0,176058.33,176914.0,177541.0
3,2402,Maricopa County,AZ,Phoenix-Mesa-Scottsdale,4,13,4,,,,...,260194.0,261705.0,262585.67,263689.0,264876.67,266422.33,268442.0,270188.67,272057.67,273468.67
4,2841,San Diego County,CA,San Diego-Carlsbad,6,73,5,415019.0,410783.0,406752.67,...,561999.67,561455.0,563480.33,564365.67,566461.0,568930.33,571215.67,571894.67,573083.0,575533.33


## Filter Zillow Data for all California Counties, 2018 and save output

In [17]:
zillow_all_counties_df = zillow_df.loc[(zillow_df["State"] == "CA")]

#use .copy() to avoid SettingWithCopyWarning warning
#save region column to new table
#zillow_clean_all_counties_df= zillow_all_counties_df[["RegionName"]].copy()


#Zillow data is broken down by month.  We want to look at annual trends
#Calc average yearly price for 2018



#zillow_clean_all_counties_df["2018 Avg Price"] = zillow_df.loc[:,"2018-01":"2018-12"].mean(axis=1)
#zillow_clean_all_counties_df["State"] = zillow_clean_all_counties_df.loc[:,"State"]
#zillow_clean_all_counties_df.set_index("RegionName", inplace = True)
#zillow_clean_all_counties_df.to_csv("output files/zillow_clean_all_counties_df.csv", index = False)
zillow_clean_all_counties_df.shape



(53, 2)

## Filter Zillow Data for 4 counties and save output

In [None]:
#keep all rows of the following counties:  Shasta County, Los Angeles County, San Francisco County, Fresno County

zillow_df = zillow_df.loc[(zillow_df["RegionName"] == "Los Angeles County") | 
                            (zillow_df["RegionName"] == "Fresno County") |
                            (zillow_df["RegionName"] == "San Francisco County") |
                            (zillow_df["RegionName"] == "Shasta County"), :]

#use .copy() to avoid SettingWithCopyWarning warning
#save region column to new table
zillow_clean_df= zillow_df[["RegionName"]].copy()

#Zillow data is broken down by month.  We want to look at annual trends
#Calc average yearly price from 2014 - 2018 of
zillow_clean_df["2014 Avg Price"] = zillow_df.loc[:,"2014-01":"2014-12"].mean(axis=1)
zillow_clean_df["2015 Avg Price"] = zillow_df.loc[:,"2015-01":"2015-12"].mean(axis=1)
zillow_clean_df["2016 Avg Price"] = zillow_df.loc[:,"2016-01":"2016-12"].mean(axis=1)
zillow_clean_df["2017 Avg Price"] = zillow_df.loc[:,"2017-01":"2017-12"].mean(axis=1)
zillow_clean_df["2018 Avg Price"] = zillow_df.loc[:,"2018-01":"2018-12"].mean(axis=1)

zillow_clean_df.to_csv("output files/zillow_clean_df.csv", index = False)
zillow_clean_df.head()

In [None]:
#reset index to Region and transpose dataframe
zillow_clean_df.set_index("RegionName", inplace = True)
zillow_clean_df = zillow_clean_df.transpose()
zillow_clean_df.to_csv("output files/zillow_clean_df.csv")
zillow_clean_df.head()

## Income Data from 2014 - 2018

In [None]:
#Data from https://www.bea.gov/system/files/2019-11/lapi1119.pdf. 
#Data from https://www.bea.gov/system/files/2018-02/lapi1116.pdf
#I'm having issues reading the excel version of data, so entering by hand  

#create dictionary of income by county and make it a dataframe
income_df = pd.DataFrame({"RegionName":["Los Angeles County", "Fresno County", "San Francisco County", "Shasta County"],
                         "2014 Income":[50730, 36448, 97498, 38410],
                         "2015 Income":[53521, 38323, 103529, 40882],
                         "2016 Income":[57127, 40327, 114697, 43412],
                         "2017 Income":[59058, 41137, 121778, 44480],
                         "2018 Income":[62224, 43084, 130696, 46582]
                         })

income_df.set_index("RegionName", inplace=True)
income_df = income_df.transpose()
income_df.to_csv("output files/income_df.csv")
income_df.head()

## Rental Data from 2014 - 2018

In [None]:
rentaldata_df = pd.read_excel('resources/County_MedianRentalPrice_3Bedroom.xlsx')
rentaldata_df.columns = rentaldata_df.iloc[0]
rentaldata_df.set_index('RegionName', inplace=True)
rentaldata_df.head()

In [None]:
selectcounties_df = rentaldata_df.loc[ ['Los Angeles County' , 'San Francisco County', 'Fresno County', 'Shasta County'] , : ]
col2014=selectcounties_df.loc[: , "2014-01":"2014-12"]
selectcounties_df['Average 2014 Rentals'] = col2014.mean(axis=1)
col2015=selectcounties_df.loc[: , "2015-01":"2015-12"]
selectcounties_df['Average 2015 Rentals'] = col2015.mean(axis=1)
col2016=selectcounties_df.loc[: , "2016-01":"2016-12"]
selectcounties_df['Average 2016 Rentals'] = col2016.mean(axis=1)
col2017=selectcounties_df.loc[: , "2017-01":"2017-12"]
selectcounties_df['Average 2017 Rentals'] = col2017.mean(axis=1)
col2018=selectcounties_df.loc[: , "2018-01":"2018-12"]
selectcounties_df['Average 2018 Rentals'] = col2018.mean(axis=1)
selected_counties_years_df=selectcounties_df.loc[:,["Average 2014 Rentals", "Average 2015 Rentals", "Average 2016 Rentals", "Average 2017 Rentals", "Average 2018 Rentals"]]
selected_counties_years_df_transposed=selected_counties_years_df.transpose()
selected_counties_years_df_transposed
selected_counties_years_df_transposed.to_csv("output files/rentals4counties.csv")

## Rental Data from 2014 - 2018

In [None]:
# Import csv Data File
# Data from https://data.ca.gov/dataset/local-area-unemployment-statistics-laus-annual-average
unemp_df = pd.read_csv("resources/Local_Area_Unemployment_Statistics__LAUS___Annual_Average copy.csv")
unemp_df

In [None]:
list(unemp_df.columns)

In [None]:
unemp_df = pd.DataFrame(unemp_df)
unemp_df.tail()

In [None]:
#filter for 4 counties
unemp_clean_df = unemp_df.loc[(unemp_df["Area Name"] == "Los Angeles County") |
                            (unemp_df["Area Name"] == "Fresno County") |
                            (unemp_df["Area Name"] == "San Francisco County") |
                            (unemp_df["Area Name"] == "Shasta County"), :]
unemp_clean_df.head()

In [None]:
#filter for 2013 - 2018
unemp_new_df = unemp_clean_df.loc[(unemp_clean_df["Year"] == 2013) |
                            (unemp_clean_df["Year"] == 2014) |
                            (unemp_clean_df["Year"] == 2015) |
                            (unemp_clean_df["Year"] == 2016) |
                            (unemp_clean_df["Year"] == 2017) |
                            (unemp_clean_df["Year"] == 2018), :]
unemp_new_df.head()

In [None]:
#reset index and drop unnecessary columns
unemp_newnew = unemp_new_df.reset_index()
unemp_add=unemp_newnew.drop(["index", 'Seasonally Adjusted (Y N)',"Status","Unnamed: 10","Area Type","Period"], axis = 1) 
unemp_add.head()

In [None]:
#calculate % change in labor force
labor_f_chg=unemp_add.groupby("Area Name")["Labor Force"].pct_change()*100
labor_chg_df = pd.DataFrame(labor_f_chg)
labor_chg_df = labor_chg_df.rename(columns = {"Labor Force":"Labor Force Change"})
labor_chg_df.head()

In [None]:
#add labor force change to the unemployemnt dataframe
unemp_add["Labor Force Change"] = labor_chg_df["Labor Force Change"].values
unemp_add.head()

In [None]:
#filter unemployemnt table for 2014 - 2018
labor_df = unemp_add.loc[(unemp_add["Year"] == 2014) |
                            (unemp_add["Year"] == 2015) |
                            (unemp_add["Year"] == 2016) |
                            (unemp_add["Year"] == 2017) |
                            (unemp_add["Year"] == 2018), :]
labor_df.head()

In [None]:
labor_reform = labor_df.pivot (index = "Year" , columns = "Area Name", values ="Unemployment Rate")
labor_reform.to_csv("output files/labor_reform.csv")
labor_reform

In [None]:
labor_change = labor_df.pivot (index = "Year" , columns = "Area Name", values ="Labor Force Change")
labor_change.to_csv("output files/labor_change.csv")
labor_change