In [27]:
import pandas as pd
import numpy as np

### Zillow

In [28]:
zillow_dfs = {
    "01": pd.read_csv("~/real_estate/data/raw/Zip_MedianValuePerSqft_AllHomes.csv", encoding='ISO-8859-1'),
    "02": pd.read_csv("~/real_estate/data/raw/Zip_PctOfHomesDecreasingInValues_AllHomes.csv", encoding='ISO-8859-1'),
    "03": pd.read_csv("~/real_estate/data/raw/Zip_PctOfHomesIncreasingInValues_AllHomes.csv", encoding='ISO-8859-1'),
    "04": pd.read_csv("~/real_estate/data/raw/Zip_Zhvi_1bedroom.csv", encoding='ISO-8859-1'),
    "05": pd.read_csv("~/real_estate/data/raw/Zip_Zhvi_2bedroom.csv", encoding='ISO-8859-1'),
    "06": pd.read_csv("~/real_estate/data/raw/Zip_Zhvi_3bedroom.csv", encoding='ISO-8859-1'),
    "07": pd.read_csv("~/real_estate/data/raw/Zip_ZriPerSqft_AllHomes.csv", encoding='ISO-8859-1'),
    "08": pd.read_csv("~/real_estate/data/raw/Zip_Zhvi_Condominum.csv", encoding='ISO-8859-1'),
    "09": pd.read_csv("~/real_estate/data/raw/Zip_Zhvi_SingleFamilyResidence.csv", encoding='ISO-8859-1')
}

In [29]:
zillow_variables = {
    "01": "median_sqft_value",
    "02": "percent_decreasing",
    "03": "percent_increasing",
    "04": "zhvi_1bed",
    "05": "zhvi_2bed",
    "06": "zhvi_3bed",
    "07": "zri_sqft_value",
    "08": "zhvi_condo",
    "09": "zhvi_singlefam"
}

In [30]:
dataframes_melt = {}

for i in zillow_variables.keys():
    
    zillow_df_ = zillow_dfs[i].drop(["RegionID","SizeRank","City","State","Metro","CountyName","SizeRank"],axis=1)
    dataframes_melt[i] = pd.melt(zillow_df_, id_vars=["RegionName"]).rename(columns={"variable":"date",
                                                                                "value":zillow_variables[i]})

In [31]:
# Join all zillow dfs
df_zillow = pd.merge(dataframes_melt["01"], dataframes_melt["02"], on=["RegionName","date"], how="outer")

for i in ["03","04","05","06","07","08","09"]:
    df_zillow = pd.merge(df_zillow, dataframes_melt[i], on=["RegionName","date"], how="outer")

In [32]:
# Filter dates, remove NaNs
df_zillow = df_zillow.loc[df_zillow["date"] > '2000']
df_zillow = df_zillow.dropna(subset=["zhvi_condo"])

# Date format
df_zillow["month"] = [int(i[5:]) for i in df_zillow["date"].values]
df_zillow["year"] = [int(i[:4]) for i in df_zillow["date"].values]
df_zillow = df_zillow.drop(["date"], axis=1)

# Format column names
df_zillow = df_zillow.rename(columns={"RegionName":"postal_code"})

In [None]:
# Export
df_zillow.to_csv("~/real_estate/data/v4/zillow_zipcode_data.csv", index=False)

df_zillow.head()