# Data Assembler

In [1]:
import pandas as pd
import numpy as np

In [2]:
cs = "Clean Data/casecleaned.csv"
fhfa = "Clean Data/FHFACleaned.csv"
fods = "Clean Data/FinancialCleaned.csv"
afford = "Clean Data/Housing_Affordability_Index_1981_2019.csv"
new_home = "Clean Data/NewHomeCleaned.csv"
unemployment = "Clean Data/UnemployCleaned.csv"

In [3]:
cs_data = pd.read_csv(cs)
fhfa_data = pd.read_csv(fhfa)
fods_data = pd.read_csv(fods)
afford_data = pd.read_csv(afford)
new_home_data = pd.read_csv(new_home, dtype={'Year': 'str'})
unemployment_data = pd.read_csv(unemployment)


For each dataframe, we'll create a "period" column that we'll use for the merge later.

In [4]:
cs_data["period"] = cs_data["Year"].map(str) + "-" + cs_data["Month"]
cs_data.head()

Unnamed: 0,Year,Month,CS_Index,period
0,2000,January,100.589697,2000-January
1,2000,February,101.692479,2000-February
2,2000,March,102.783829,2000-March
3,2000,April,103.996444,2000-April
4,2000,May,105.252818,2000-May


In [5]:
fhfa_data["period"] = fhfa_data["Year"].map(str) + "-" + fhfa_data["Month"]
fhfa_data.head()

Unnamed: 0,East North Central (SA),East South Central (SA),Middle Atlantic (SA),Mountain (SA),New England (SA),Pacific (SA),South Atlantic (SA),West North Central (SA),West South Central (SA),USA (SA),Year,Month,period
0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,1991,January,1991-January
1,101.07,100.41,100.17,98.63,102.55,100.52,100.41,100.7,99.72,100.46,1991,February,1991-February
2,101.0,100.6,99.74,100.54,101.44,100.13,100.6,100.14,100.55,100.5,1991,March,1991-March
3,101.04,100.58,99.07,100.35,100.94,100.06,100.48,100.46,100.09,100.33,1991,April,1991-April
4,101.43,100.77,99.12,100.39,99.81,100.11,100.57,100.24,100.35,100.41,1991,May,1991-May


In [6]:
afford_data["period"] = afford_data["Year"].map(str) + "-" + afford_data["Month"]
afford_data.head()

Unnamed: 0,Month,Year,Housing_Affordability_Index,period
0,March,2019,152.7,2019-March
1,February,2019,156.6,2019-February
2,January,2019,154.6,2019-January
3,December,2018,147.4,2018-December
4,November,2018,144.3,2018-November


In [7]:
new_home_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65528 entries, 0 to 65527
Data columns (total 3 columns):
Market Absorption Rate (%)    351 non-null float64
Year                          360 non-null object
Month                         360 non-null object
dtypes: float64(1), object(2)
memory usage: 1.5+ MB


In [9]:
new_home_data["period"] = new_home_data["Year"].map(str) + "-" + new_home_data["Month"]
new_home_data.head()

Unnamed: 0,Market Absorption Rate (%),Year,Month,period
0,45.0,1990,January,1990-January
1,50.0,1990,February,1990-February
2,58.0,1990,March,1990-March
3,52.0,1990,April,1990-April
4,50.0,1990,May,1990-May


In [11]:
unemployment_data["period"] = unemployment_data["Year"].map(str) + "-" + unemployment_data["Month"]
unemployment_data.head()

Unnamed: 0,Year,Month,Value,period
0,2000,January,4.0,2000-January
1,2000,February,4.1,2000-February
2,2000,March,4.0,2000-March
3,2000,April,3.8,2000-April
4,2000,May,4.0,2000-May


In [12]:
# Financial Obligations and Debt Service is in quarterly format. Need to interpolate monthly first.
fods_data.head()

Unnamed: 0,Period,"Financial obligations ratio, seasonally adjusted","Consumer debt service ratio, seasonally adjusted","Debt service ratio, seasonally adjusted","Mortgage debt service ratio, seasonally adjusted",Year,Quarter
0,1990Q1,16.900919,5.635607,11.64915,6.013543,1990,Q1
1,1990Q2,16.875752,5.54066,11.60066,6.06,1990,Q2
2,1990Q3,16.888249,5.481259,11.583131,6.101873,1990,Q3
3,1990Q4,16.960268,5.419602,11.604449,6.184847,1990,Q4
4,1991Q1,16.960137,5.355239,11.578032,6.222793,1991,Q1


0   1990-01-01
1   1990-04-01
2   1990-07-01
3   1990-10-01
4   1991-01-01
Name: Period, dtype: datetime64[ns]

<bound method Resampler.nearest of DatetimeIndexResampler [freq=<MonthEnd>, axis=0, closed=right, label=right, convention=start, base=0]>

Unnamed: 0,Period,"Financial obligations ratio, seasonally adjusted","Consumer debt service ratio, seasonally adjusted","Debt service ratio, seasonally adjusted","Mortgage debt service ratio, seasonally adjusted",Year,Quarter
0,1990-01-01,16.900919,5.635607,11.64915,6.013543,1990,Q1
1,1990-04-01,16.875752,5.54066,11.60066,6.06,1990,Q2
2,1990-07-01,16.888249,5.481259,11.583131,6.101873,1990,Q3
3,1990-10-01,16.960268,5.419602,11.604449,6.184847,1990,Q4
4,1991-01-01,16.960137,5.355239,11.578032,6.222793,1991,Q1


Unnamed: 0,Period,"Financial obligations ratio, seasonally adjusted","Consumer debt service ratio, seasonally adjusted","Debt service ratio, seasonally adjusted","Mortgage debt service ratio, seasonally adjusted",Year,Quarter,Month
0,1990-01-01,16.900919,5.635607,11.64915,6.013543,1990,Q1,1
1,1990-04-01,16.875752,5.54066,11.60066,6.06,1990,Q2,4
2,1990-07-01,16.888249,5.481259,11.583131,6.101873,1990,Q3,7
3,1990-10-01,16.960268,5.419602,11.604449,6.184847,1990,Q4,10
4,1991-01-01,16.960137,5.355239,11.578032,6.222793,1991,Q1,1


AttributeError: module 'pandas' has no attribute 'drop'

Unnamed: 0,Period,"Financial obligations ratio, seasonally adjusted","Consumer debt service ratio, seasonally adjusted","Debt service ratio, seasonally adjusted","Mortgage debt service ratio, seasonally adjusted",Year,Quarter,Month
0,1990-01-01,16.900919,5.635607,11.649150,6.013543,1990,Q1,
1,1990-04-01,16.875752,5.540660,11.600660,6.060000,1990,Q2,
2,1990-07-01,16.888249,5.481259,11.583131,6.101873,1990,Q3,
3,1990-10-01,16.960268,5.419602,11.604449,6.184847,1990,Q4,
4,1991-01-01,16.960137,5.355239,11.578032,6.222793,1991,Q1,
5,1991-04-01,16.818953,5.238545,11.434237,6.195692,1991,Q2,
6,1991-07-01,16.703078,5.137354,11.318681,6.181328,1991,Q3,
7,1991-10-01,16.479746,5.017251,11.117333,6.100082,1991,Q4,
8,1992-01-01,16.160733,4.867179,10.855581,5.988402,1992,Q1,
9,1992-04-01,15.971127,4.774439,10.680886,5.906447,1992,Q2,
