In [None]:
import pandas as pd

In [None]:
# ZHVI All Homes (SFR, Condo/Co-op) Time Series ($) (by ZIP code)
csv_home_values = "Resources/Zip_Zhvi_AllHomes.csv"

# Median Sale Price - Seasonally Adjusted ($) (by ZIP code)
csv_home_sales = "Resources/Sale_Prices_Zip.csv"

# Monthly Home Sales (Number, Raw) (by ZIP code)
csv_number_sales = "Resources/Sale_Counts_Zip.csv"

# Foreclosure Resales (%) (by ZIP code)
csv_foreclosure_resales = "Resources/SalesPrevForeclosed_Share_Zip.csv"

home_values = pd.read_csv(csv_home_values, encoding="ISO-8859-1")
home_sales = pd.read_csv(csv_home_sales, encoding="ISO-8859-1")
number_sales = pd.read_csv(csv_number_sales, encoding="ISO-8859-1")
foreclosure_resales = pd.read_csv(csv_foreclosure_resales, encoding="ISO-8859-1")

# Suffixes:
# ZVHI = Zillow Home Value Index ($)
# MSR = Median Sales Price - Seasonally Adjusted ($)
# NS = Number of Sales
# FR = Foreclosure Resales (%)



In [None]:
# Python Dictionary to translate US States to Two letter codes
# Dictionary obtained from rogerallen (https://gist.github.com/rogerallen/1583593)
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

# Rename StateName columns
home_values = home_values.rename(columns={"RegionName":"ZIP Code",
                                          "2008-01":"2008-01_ZHVI",
                                          "2008-02":"2008-02_ZHVI"})
home_sales = home_sales.rename(columns={"RegionName":"ZIP Code",
                                        "StateName":"State"})
number_sales = number_sales.rename(columns={"RegionName":"ZIP Code",
                                            "StateName":"State",
                                            "seasAdj":"seasAdj_NS"})
foreclosure_resales = foreclosure_resales.rename(columns={"RegionName":"ZIP Code",
                                                          "StateName":"State"})

# Translate states into two letter codes
for index, row in home_sales.iterrows():
    state = home_sales.loc[index, "State"]
    abbr = us_state_abbrev[state]
    home_sales.loc[index, "State"] = abbr
    
for index, row in number_sales.iterrows():
    state = number_sales.loc[index, "State"]
    abbr = us_state_abbrev[state]
    number_sales.loc[index, "State"] = abbr

for index, row in foreclosure_resales.iterrows():
    state = foreclosure_resales.loc[index, "State"]
    abbr = us_state_abbrev[state]
    foreclosure_resales.loc[index, "State"] = abbr

home_sales.head()

In [None]:
merge1 = pd.merge(home_values, home_sales, on=["ZIP Code","State","RegionID"], how="outer", suffixes=("_ZHVI", "_MSR"))
merge2 = pd.merge(number_sales, foreclosure_resales, on=["ZIP Code","State","RegionID"], how="outer", suffixes=("_NS", "_FR"))
main_data = pd.merge(merge1, merge2, on=["ZIP Code","State","RegionID"], how="outer")

main_data.head()

In [None]:
#main_data.to_csv("Output/main_data.csv", index=False, header=True)

In [226]:
# ZHVI yearly averages
main_data["2008-Avg_ZHVI"] = main_data.iloc[:, 7:19].mean(axis=1)
main_data["2009-Avg_ZHVI"] = main_data.iloc[:, 19:31].mean(axis=1)
main_data["2010-Avg_ZHVI"] = main_data.iloc[:, 31:43].mean(axis=1)
main_data["2011-Avg_ZHVI"] = main_data.iloc[:, 43:55].mean(axis=1)
main_data["2012-Avg_ZHVI"] = main_data.iloc[:, 55:67].mean(axis=1)
main_data["2013-Avg_ZHVI"] = main_data.iloc[:, 67:79].mean(axis=1)
main_data["2014-Avg_ZHVI"] = main_data.iloc[:, 79:92].mean(axis=1)
main_data["2015-Avg_ZHVI"] = main_data.iloc[:, 92:104].mean(axis=1)
main_data["2016-Avg_ZHVI"] = main_data.iloc[:, 104:116].mean(axis=1)
main_data["2017-Avg_ZHVI"] = main_data.iloc[:, 116:128].mean(axis=1)
main_data["2018-Avg_ZHVI"] = main_data.iloc[:, 128:140].mean(axis=1)
main_data["2019-Avg_ZHVI"] = main_data.iloc[:, 140:151].mean(axis=1)

main_data["2008-Avg_MSR"] = main_data.iloc[:, 7:19].mean(axis=1)
main_data["2009-Avg_MSR"] = main_data.iloc[:, 19:31].mean(axis=1)
main_data["2010-Avg_MSR"] = main_data.iloc[:, 31:43].mean(axis=1)
main_data["2011-Avg_MSR"] = main_data.iloc[:, 43:55].mean(axis=1)
main_data["2012-Avg_MSR"] = main_data.iloc[:, 55:67].mean(axis=1)
main_data["2013-Avg_MSR"] = main_data.iloc[:, 67:79].mean(axis=1)
main_data["2014-Avg_MSR"] = main_data.iloc[:, 79:92].mean(axis=1)
main_data["2015-Avg_MSR"] = main_data.iloc[:, 92:104].mean(axis=1)
main_data["2016-Avg_MSR"] = main_data.iloc[:, 104:116].mean(axis=1)
main_data["2017-Avg_MSR"] = main_data.iloc[:, 116:128].mean(axis=1)
main_data["2018-Avg_MSR"] = main_data.iloc[:, 128:140].mean(axis=1)
main_data["2019-Avg_MSR"] = main_data.iloc[:, 140:151].mean(axis=1)


In [None]:

data_organized = home_values_renamed[["ZIP Code","City","State","Metro","County","2008 Avg.","2009 Avg.",
                                      "2010 Avg.","2011 Avg.","2012 Avg.","2013 Avg.","2014 Avg.","2015 Avg.",
                                      "2016 Avg.","2017 Avg.","2018 Avg.","2019 Avg."]]

data_organized