# Transformations and Wrangling

This workbook imports the combined household dataset from the American Housing Survey and readys it for machine learning with various transformations.

Steps:
1. Subset the household dataset for first-time homeowners only.
2. Remove weight and flag variales from the household dataset.
3. Separate variale into target, continuous, categorical, and binary variables.
4. Remove variables whose portion of missing values is above the threshhold level.
5. Remove all variables related to house "experience"
6. Impute the missing values for continuous, categorical, and binary variables.
7. Create a dummy variable dataset from categorical variables.
8. Bin the housing satisfaction target variale.
9. Log transform income variables.
10. Combine datasets together into regression and classification model-ready datasets.
11. Update the AWS database.

NOTE: Each of these steps needs to be run numerical order for final datasets to be created correctly.

In [1]:
import os
import pandas as pd
import numpy as np
from functools import reduce
from sklearn.impute import SimpleImputer

Instantiate Variables

In [2]:
threshold = 0.20
path = os.path.join(os.getcwd(), 'data', 'working')

In [3]:
df = pd.read_csv(os.path.join(path, 'AHS Household Combined.csv'))
varcon = pd.read_csv(os.path.join(os.getcwd(), 'data', 'concordance', 'variable_concordance.csv'))

1 - Subset the dataset to only first-time home buyers

In [4]:
df.shape

(136245, 989)

In [5]:
df_fh = df[df['FIRSTHOME']==1].copy()
df_fh.shape

(28804, 989)

In [6]:
df_fh['HINCP'].min()

-4920

Create lists of classifications for each variable

In [7]:
all_vars = varcon.iloc[:,0]
weights_and_flags = varcon[(varcon['Weight']==True) | (varcon['Flag']==True)]['Variable'].values
non_exp = varcon[varcon['Not Experience']==True]['Variable'].values
target_vars = list(varcon[varcon['Type']=='Target']['Variable'].values)
cont_vars = list(varcon[varcon['Type']=='Continuous']['Variable'].values)
cat_vars = list(varcon[varcon['Type']=='Categorical']['Variable'].values)
binary_vars = list(varcon[varcon['Type']=='Binary']['Variable'].values)

2 - Remove weight and flag variales from the dataset 

In [8]:
df_fh2 = df_fh[list(set(all_vars).difference(set(weights_and_flags)))].copy()
df_fh2.shape

(28804, 260)

3 - Seperate dataset into target variables and dependent variables

In [9]:
target = df_fh2[['CONTROL','YEAR','RATINGHS']].copy()
estimators = df_fh2.drop(['RATINGHS','RATINGNH'], axis=1).copy()
estimators_cont = estimators[['CONTROL','YEAR'] + cont_vars]
estimators_cat = estimators[['CONTROL','YEAR'] + cat_vars]
estimators_binary = estimators[['CONTROL','YEAR'] + binary_vars]

4 - Remove variables whose portion of missing values is above the threshhold

In [10]:
target.replace([-9], np.nan, inplace=True)

miss_percent = df_fh2[cont_vars].isin([-9,-6]).sum(axis=0) / df_fh2[cont_vars].count(axis=0)
miss_percent_lt_thresh = miss_percent[miss_percent.iloc[:] < threshold]
estimators_cont = df_fh2[['CONTROL','YEAR'] + list(miss_percent_lt_thresh.index)].copy()
estimators_cont.replace([-9,-6], np.nan, inplace=True)

miss_percent = df_fh2[cat_vars].isin([-9]).sum(axis=0) / df_fh2[cat_vars].count(axis=0)
miss_percent_lt_thresh = miss_percent[miss_percent.iloc[:] < threshold]
estimators_cat = df_fh2[['CONTROL','YEAR'] + list(miss_percent_lt_thresh.index)].copy()
estimators_cat.replace([-9], np.nan, inplace=True)

miss_percent = df_fh2[binary_vars].isin([-9,-6]).sum(axis=0) / df_fh2[binary_vars].count(axis=0)
miss_percent_lt_thresh = miss_percent[miss_percent.iloc[:] < threshold]
estimators_binary = df_fh2[['CONTROL','YEAR'] + list(miss_percent_lt_thresh.index)].copy()
estimators_binary.replace([-9,-6], np.nan, inplace=True)

05 - Keep only variables that capture "housing experience"

In [11]:
dfs = [target, estimators_cont, estimators_cat, estimators_binary]
df_fh_all_vars = reduce(lambda left, right: pd.merge(left, right, how='inner', 
                                                   on=['CONTROL','YEAR']), dfs).dropna(how='any')

In [12]:
df_fh_non_exp = df_fh_all_vars[set(df_fh_all_vars.columns) & set(non_exp)]

In [13]:
cont_vars = list(set(estimators_cont.columns) & set(non_exp))
cat_vars = list(set(estimators_cat.columns) & set(non_exp))
binary_vars = list(set(estimators_binary.columns) & set(non_exp))

In [14]:
estimators_cont = estimators_cont[['CONTROL','YEAR'] + list(set(estimators_cont.columns) & set(non_exp))]
estimators_cat = estimators_cat[['CONTROL','YEAR'] + list(set(estimators_cat.columns) & set(non_exp))]
estimators_binary = estimators_binary[['CONTROL','YEAR'] + list(set(estimators_binary.columns) & set(non_exp))]

In [15]:
estimators_cont = df_fh2[['CONTROL','YEAR'] + cont_vars]
estimators_cat = df_fh2[['CONTROL','YEAR'] + cat_vars]
estimators_binary = df_fh2[['CONTROL','YEAR'] + binary_vars]

06 - Impute missing values for all estimators

Divide the list of remaining variales into 4 groups: 1) target, 2) continuous, 3) categorical, and 4) binary

Target Variables

In [16]:
imputer_target = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputer_target.fit(target)
imputed_target = imputer_target.transform(target)
target = pd.DataFrame(imputed_target, columns=target.columns)

Continuous Variables

In [17]:
imputer_cont = SimpleImputer(missing_values=np.nan, strategy='median')
imputer_cont.fit(estimators_cont)
imputed_cont = imputer_cont.transform(estimators_cont)
estimators_cont = pd.DataFrame(imputed_cont, columns=estimators_cont.columns)

Categorical Variables

In [18]:
imputer_cat = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputer_cat.fit(estimators_cat)
imputed_cat = imputer_cat.transform(estimators_cat)
estimators_cat = pd.DataFrame(imputed_cat, columns=estimators_cat.columns)

Binary Variables

In [19]:
imputer_binary = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputer_binary.fit(estimators_binary)
imputed_binary = imputer_binary.transform(estimators_binary)
estimators_binary = pd.DataFrame(imputed_binary, columns=estimators_binary.columns)

07 - Create dummies from categorical variables

In [20]:
estimators_cat_dum = pd.get_dummies(estimators_cat, columns=cat_vars)
estimators_binary_dum = pd.get_dummies(estimators_binary, columns=binary_vars)

10 - Merge datasets with different variable types back into one dataset

In [21]:
dfs_reg = [target, estimators_cont, estimators_cat_dum, estimators_binary_dum]
dfs_class = [target, estimators_cont, estimators_cat, estimators_binary]
df_final_reg = reduce(lambda left, right: pd.merge(left, right, how='inner', on=['CONTROL','YEAR']), dfs_reg).dropna(how='any')
df_final_class = reduce(lambda left, right: pd.merge(left, right, how='inner', on=['CONTROL','YEAR']), dfs_class).dropna(how='any')

08 - Bin Housing Satisfaction Variables 

09 - Log transform both income variales and merge back into the dataframe

In [22]:
df_final_reg['LN_HINCP'] = np.where(df_final_reg['HINCP'] > 1, np.log(df_final_reg['HINCP']), 0)
df_final_reg['LN_FINCP'] = np.where(df_final_reg['FINCP'] > 1, np.log(df_final_reg['FINCP']), 0)
df_final_class['HINCP_BIN'] = np.where(df_final_class['HINCP']>100000, 100000, df_final_class['HINCP'])
df_final_class['FINCP_BIN'] = np.where(df_final_class['FINCP']>100000, 100000, df_final_class['FINCP'])
df_final_class['HINCP_BIN'] = np.where(df_final_class['HINCP']<=0, 0, df_final_class['HINCP'])
df_final_class['FINCP_BIN'] = np.where(df_final_class['FINCP']<=0, 0, df_final_class['FINCP'])
df_final_class['HINCP_BIN'] = pd.cut(df_final_class['HINCP'], bins=np.linspace(0,100000,11), include_lowest=True)
df_final_class['FINCP_BIN'] = pd.cut(df_final_class['FINCP'], bins=np.linspace(0,100000,11), include_lowest=True)

  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.
  
  


In [23]:
df_final_class['FINCP']

0         58700.0
1        126000.0
2        130000.0
3        100000.0
4         15000.0
5        100000.0
6         13200.0
7         71004.0
8         30200.0
9         29000.0
10        25000.0
11        31000.0
12            0.0
13       100100.0
14       150000.0
15       516000.0
16        24000.0
17       461000.0
18       311200.0
19       219000.0
20       120000.0
21       230000.0
22        10000.0
23        14300.0
24        45000.0
25        31200.0
26        80000.0
27        55100.0
28       106000.0
29        77400.0
           ...   
28774     32700.0
28775     46000.0
28776    121000.0
28777     41000.0
28778    327100.0
28779    111200.0
28780     77000.0
28781     40004.0
28782      4000.0
28783     93000.0
28784     13000.0
28785     14500.0
28786    195600.0
28787    150050.0
28788    171000.0
28789     52800.0
28790     72000.0
28791     61200.0
28792         0.0
28793     31000.0
28794     65530.0
28795      1300.0
28796    257000.0
28797     85000.0
28798     

Create CSV Files

In [25]:
#df_fh.to_csv(os.path.join(path, 'AHS Household First Home.csv'))
#df_fh_non_exp.to_csv(os.path.join(path, 'AHS Household nonexperience variables with missings.csv'))
df_final_reg.to_csv(os.path.join(path, 'AHS Household Reg.csv'))
df_final_class.to_csv(os.path.join(path, 'AHS Household Class.csv'))

11 - Update Database

Send intermediate tables to the database

In [24]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:Admin123@project.cgxhdwn5zb5t.us-east-1.rds.amazonaws.com:5432/postgres')
df_final_class.to_sql('ahs_household_class', engine, if_exists='replace')

ProgrammingError: (psycopg2.ProgrammingError) can't adapt type 'pandas._libs.interval.Interval'
[SQL: INSERT INTO ahs_household_class (index, "CONTROL", "YEAR", "RATINGHS", "NUMELDERS", "BATHROOMS", "NUMOLDKIDS", "HHAGE", "DINING", "UTILAMT", "NUMVETS", "NUMADULTS", "UFINROOMS", "UNITSIZE", "KITCHENS", "FINROOMS", "TOTROOMS", "HHADLTKIDS", "GASAMT", "WATERAMT", "NUMPEOPLE", "OTHERAMT", "HHYNGKIDS", "NUMSUBFAM", "LAUNDY", "MULTIGEN", "OILAMT", "HINCP", "LOTSIZE", "FINCP", "BEDROOMS", "NUMSECFAM", "NUMNONREL", "PARTNER", "STORIES", "PERPOVLVL", "HHMOVE", "TRASHAMT", "NUMYNGKIDS", "ELECAMT", "HHOLDKIDS", "MVG3COST", "NUMMEMRY", "DIVISION", "BLD", "HHMAR", "MILHH", "NUMERRND", "HHNATVTY", "MVG1COST", "COOKFUEL", "FIREPLACE", "HHRACE", "OMB13CBSA", "MVG2COST", "NUMCARE", "NUMWALK", "NUMHEAR", "NUMSEE", "HSHLDTYPE", "OWNLOT", "HHHEAR", "HHSEE", "WASHER", "KITCHSINK", "DISHWASH", "CONDO", "FRIDGE", "HHCARE", "HHSEX", "FIRSTHOME", "GARAGE", "HHMEMRY", "HHWALK", "WINBARS", "NOSTEP", "HHERRND", "HINCP_BIN", "FINCP_BIN") VALUES (%(index)s, %(CONTROL)s, %(YEAR)s, %(RATINGHS)s, %(NUMELDERS)s, %(BATHROOMS)s, %(NUMOLDKIDS)s, %(HHAGE)s, %(DINING)s, %(UTILAMT)s, %(NUMVETS)s, %(NUMADULTS)s, %(UFINROOMS)s, %(UNITSIZE)s, %(KITCHENS)s, %(FINROOMS)s, %(TOTROOMS)s, %(HHADLTKIDS)s, %(GASAMT)s, %(WATERAMT)s, %(NUMPEOPLE)s, %(OTHERAMT)s, %(HHYNGKIDS)s, %(NUMSUBFAM)s, %(LAUNDY)s, %(MULTIGEN)s, %(OILAMT)s, %(HINCP)s, %(LOTSIZE)s, %(FINCP)s, %(BEDROOMS)s, %(NUMSECFAM)s, %(NUMNONREL)s, %(PARTNER)s, %(STORIES)s, %(PERPOVLVL)s, %(HHMOVE)s, %(TRASHAMT)s, %(NUMYNGKIDS)s, %(ELECAMT)s, %(HHOLDKIDS)s, %(MVG3COST)s, %(NUMMEMRY)s, %(DIVISION)s, %(BLD)s, %(HHMAR)s, %(MILHH)s, %(NUMERRND)s, %(HHNATVTY)s, %(MVG1COST)s, %(COOKFUEL)s, %(FIREPLACE)s, %(HHRACE)s, %(OMB13CBSA)s, %(MVG2COST)s, %(NUMCARE)s, %(NUMWALK)s, %(NUMHEAR)s, %(NUMSEE)s, %(HSHLDTYPE)s, %(OWNLOT)s, %(HHHEAR)s, %(HHSEE)s, %(WASHER)s, %(KITCHSINK)s, %(DISHWASH)s, %(CONDO)s, %(FRIDGE)s, %(HHCARE)s, %(HHSEX)s, %(FIRSTHOME)s, %(GARAGE)s, %(HHMEMRY)s, %(HHWALK)s, %(WINBARS)s, %(NOSTEP)s, %(HHERRND)s, %(HINCP_BIN)s, %(FINCP_BIN)s)]
[parameters: ({'index': 0, 'CONTROL': 11000006.0, 'YEAR': 2017.0, 'RATINGHS': 10.0, 'NUMELDERS': 0.0, 'BATHROOMS': 3.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 64.0, 'DINING': 0.0, 'UTILAMT': 220.0, 'NUMVETS': 0.0, 'NUMADULTS': 2.0, 'UFINROOMS': 0.0, 'UNITSIZE': 4.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 5.0, 'HHADLTKIDS': 0.0, 'GASAMT': 30.0, 'WATERAMT': 2.0, 'NUMPEOPLE': 2.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 1.0, 'MULTIGEN': 2.0, 'OILAMT': 0.0, 'HINCP': 58700.0, 'LOTSIZE': 3.0, 'FINCP': 58700.0, 'BEDROOMS': 3.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 1.0, 'PERPOVLVL': 361.0, 'HHMOVE': 1990.0, 'TRASHAMT': 70.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 120.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 7, 'BLD': 2, 'HHMAR': 3, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 57, 'MVG1COST': 3, 'COOKFUEL': 2, 'FIREPLACE': 2, 'HHRACE': 1, 'OMB13CBSA': 99998, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 3, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 2, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 1, 'HHERRND': 2, 'HINCP_BIN': Interval(50000.0, 60000.0, closed='right'), 'FINCP_BIN': Interval(50000.0, 60000.0, closed='right')}, {'index': 1, 'CONTROL': 11000016.0, 'YEAR': 2017.0, 'RATINGHS': 8.0, 'NUMELDERS': 0.0, 'BATHROOMS': 2.0, 'NUMOLDKIDS': 1.0, 'HHAGE': 38.0, 'DINING': 1.0, 'UTILAMT': 230.0, 'NUMVETS': 0.0, 'NUMADULTS': 2.0, 'UFINROOMS': 0.0, 'UNITSIZE': 4.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 6.0, 'HHADLTKIDS': 0.0, 'GASAMT': 0.0, 'WATERAMT': 3.0, 'NUMPEOPLE': 3.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 2.0, 'OILAMT': 30.0, 'HINCP': 126000.0, 'LOTSIZE': 5.0, 'FINCP': 126000.0, 'BEDROOMS': 3.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 3.0, 'PERPOVLVL': 501.0, 'HHMOVE': 2015.0, 'TRASHAMT': 40.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 160.0, 'HHOLDKIDS': 1.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 5, 'BLD': 2, 'HHMAR': 1, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 57, 'MVG1COST': -6, 'COOKFUEL': 1, 'FIREPLACE': 4, 'HHRACE': 1, 'OMB13CBSA': 37980, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 1, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 2, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 2, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 2, 'HHERRND': 2, 'HINCP_BIN': None, 'FINCP_BIN': None}, {'index': 2, 'CONTROL': 11000017.0, 'YEAR': 2017.0, 'RATINGHS': 9.0, 'NUMELDERS': 0.0, 'BATHROOMS': 4.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 43.0, 'DINING': 1.0, 'UTILAMT': 220.0, 'NUMVETS': 0.0, 'NUMADULTS': 2.0, 'UFINROOMS': 0.0, 'UNITSIZE': 6.0, 'KITCHENS': 1.0, 'FINROOMS': 2.0, 'TOTROOMS': 7.0, 'HHADLTKIDS': 0.0, 'GASAMT': 90.0, 'WATERAMT': 3.0, 'NUMPEOPLE': 2.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 1.0, 'OILAMT': 0.0, 'HINCP': 133000.0, 'LOTSIZE': 2.0, 'FINCP': 130000.0, 'BEDROOMS': 3.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 1.0, 'PARTNER': 3.0, 'STORIES': 3.0, 'PERPOVLVL': 501.0, 'HHMOVE': 2016.0, 'TRASHAMT': 40.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 90.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 5, 'BLD': 2, 'HHMAR': 6, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 57, 'MVG1COST': 1, 'COOKFUEL': 1, 'FIREPLACE': 4, 'HHRACE': 1, 'OMB13CBSA': 37980, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 7, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 2, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 2, 'HHERRND': 2, 'HINCP_BIN': None, 'FINCP_BIN': None}, {'index': 3, 'CONTROL': 11000023.0, 'YEAR': 2017.0, 'RATINGHS': 8.0, 'NUMELDERS': 0.0, 'BATHROOMS': 2.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 52.0, 'DINING': 1.0, 'UTILAMT': 640.0, 'NUMVETS': 0.0, 'NUMADULTS': 2.0, 'UFINROOMS': 0.0, 'UNITSIZE': 4.0, 'KITCHENS': 1.0, 'FINROOMS': 2.0, 'TOTROOMS': 7.0, 'HHADLTKIDS': 0.0, 'GASAMT': 60.0, 'WATERAMT': 110.0, 'NUMPEOPLE': 2.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 1.0, 'OILAMT': 0.0, 'HINCP': 100000.0, 'LOTSIZE': 2.0, 'FINCP': 100000.0, 'BEDROOMS': 3.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 3.0, 'PERPOVLVL': 501.0, 'HHMOVE': 2004.0, 'TRASHAMT': 20.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 450.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 5, 'BLD': 2, 'HHMAR': 1, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 57, 'MVG1COST': -6, 'COOKFUEL': 1, 'FIREPLACE': 2, 'HHRACE': 1, 'OMB13CBSA': 37980, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 1, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 1, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 1, 'HHERRND': 2, 'HINCP_BIN': Interval(90000.0, 100000.0, closed='right'), 'FINCP_BIN': Interval(90000.0, 100000.0, closed='right')}, {'index': 4, 'CONTROL': 11000046.0, 'YEAR': 2017.0, 'RATINGHS': 10.0, 'NUMELDERS': 0.0, 'BATHROOMS': 2.0, 'NUMOLDKIDS': 3.0, 'HHAGE': 37.0, 'DINING': 0.0, 'UTILAMT': 210.0, 'NUMVETS': 0.0, 'NUMADULTS': 2.0, 'UFINROOMS': 0.0, 'UNITSIZE': 2.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 5.0, 'HHADLTKIDS': 0.0, 'GASAMT': 30.0, 'WATERAMT': 2.0, 'NUMPEOPLE': 5.0, 'OTHERAMT': 50.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 2.0, 'OILAMT': 0.0, 'HINCP': 15000.0, 'LOTSIZE': 2.0, 'FINCP': 15000.0, 'BEDROOMS': 3.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 1.0, 'PERPOVLVL': 52.0, 'HHMOVE': 2000.0, 'TRASHAMT': 2.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 130.0, 'HHOLDKIDS': 3.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 5, 'BLD': 1, 'HHMAR': 1, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 313, 'MVG1COST': -6, 'COOKFUEL': 3, 'FIREPLACE': 4, 'HHRACE': 1, 'OMB13CBSA': 99998, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 1, 'OWNLOT': 2, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 2, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 1, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 2, 'HHERRND': 2, 'HINCP_BIN': Interval(10000.0, 20000.0, closed='right'), 'FINCP_BIN': Interval(10000.0, 20000.0, closed='right')}, {'index': 5, 'CONTROL': 11000048.0, 'YEAR': 2017.0, 'RATINGHS': 9.0, 'NUMELDERS': 0.0, 'BATHROOMS': 4.0, 'NUMOLDKIDS': 2.0, 'HHAGE': 47.0, 'DINING': 1.0, 'UTILAMT': 290.0, 'NUMVETS': 0.0, 'NUMADULTS': 2.0, 'UFINROOMS': 0.0, 'UNITSIZE': 6.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 7.0, 'HHADLTKIDS': 0.0, 'GASAMT': 70.0, 'WATERAMT': 70.0, 'NUMPEOPLE': 4.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 1.0, 'MULTIGEN': 2.0, 'OILAMT': 0.0, 'HINCP': 100000.0, 'LOTSIZE': 3.0, 'FINCP': 100000.0, 'BEDROOMS': 4.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 3.0, 'PERPOVLVL': 406.0, 'HHMOVE': 2008.0, 'TRASHAMT': 30.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 120.0, 'HHOLDKIDS': 2.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 5, 'BLD': 2, 'HHMAR': 1, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 233, 'MVG1COST': -6, 'COOKFUEL': 1, 'FIREPLACE': 2, 'HHRACE': 4, 'OMB13CBSA': 37980, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 1, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 2, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 2, 'HHERRND': 2, 'HINCP_BIN': Interval(90000.0, 100000.0, closed='right'), 'FINCP_BIN': Interval(90000.0, 100000.0, closed='right')}, {'index': 6, 'CONTROL': 11000052.0, 'YEAR': 2017.0, 'RATINGHS': 5.0, 'NUMELDERS': 1.0, 'BATHROOMS': 1.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 65.0, 'DINING': 0.0, 'UTILAMT': 240.0, 'NUMVETS': 0.0, 'NUMADULTS': 1.0, 'UFINROOMS': 0.0, 'UNITSIZE': 3.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 6.0, 'HHADLTKIDS': 0.0, 'GASAMT': 50.0, 'WATERAMT': 2.0, 'NUMPEOPLE': 1.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 1.0, 'OILAMT': 0.0, 'HINCP': 13200.0, 'LOTSIZE': 4.0, 'FINCP': 13200.0, 'BEDROOMS': 4.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 2.0, 'PERPOVLVL': 113.0, 'HHMOVE': 1965.0, 'TRASHAMT': 50.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 140.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 4, 'BLD': 2, 'HHMAR': 6, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 57, 'MVG1COST': -6, 'COOKFUEL': 2, 'FIREPLACE': 4, 'HHRACE': 1, 'OMB13CBSA': 99998, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 4, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 2, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 1, 'FIRSTHOME': 1, 'GARAGE': 2, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 2, 'HHERRND': 2, 'HINCP_BIN': Interval(10000.0, 20000.0, closed='right'), 'FINCP_BIN': Interval(10000.0, 20000.0, closed='right')}, {'index': 7, 'CONTROL': 11000054.0, 'YEAR': 2017.0, 'RATINGHS': 8.0, 'NUMELDERS': 0.0, 'BATHROOMS': 1.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 49.0, 'DINING': 0.0, 'UTILAMT': 100.0, 'NUMVETS': 0.0, 'NUMADULTS': 1.0, 'UFINROOMS': 0.0, 'UNITSIZE': 3.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 4.0, 'HHADLTKIDS': 0.0, 'GASAMT': 0.0, 'WATERAMT': 2.0, 'NUMPEOPLE': 1.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 1.0, 'OILAMT': 0.0, 'HINCP': 71004.0, 'LOTSIZE': -6.0, 'FINCP': 71004.0, 'BEDROOMS': 2.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 7.0, 'PERPOVLVL': 501.0, 'HHMOVE': 2000.0, 'TRASHAMT': 2.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 100.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 5, 'BLD': 9, 'HHMAR': 6, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 57, 'MVG1COST': -6, 'COOKFUEL': 1, 'FIREPLACE': 4, 'HHRACE': 1, 'OMB13CBSA': 37980, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 4, 'OWNLOT': -6, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 1, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 1, 'FIRSTHOME': 1, 'GARAGE': 2, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': -6, 'NOSTEP': 2, 'HHERRND': 2, 'HINCP_BIN': Interval(70000.0, 80000.0, closed='right'), 'FINCP_BIN': Interval(70000.0, 80000.0, closed='right')}  ... displaying 10 of 28804 total bound parameter sets ...  {'index': 28802, 'CONTROL': 11085283.0, 'YEAR': 2015.0, 'RATINGHS': 8.0, 'NUMELDERS': 1.0, 'BATHROOMS': 1.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 85.0, 'DINING': 0.0, 'UTILAMT': 80.0, 'NUMVETS': 1.0, 'NUMADULTS': 1.0, 'UFINROOMS': 0.0, 'UNITSIZE': 4.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 4.0, 'HHADLTKIDS': 0.0, 'GASAMT': 0.0, 'WATERAMT': 3.0, 'NUMPEOPLE': 1.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 0.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 1.0, 'OILAMT': 0.0, 'HINCP': 2000.0, 'LOTSIZE': 5.0, 'FINCP': 2000.0, 'BEDROOMS': 2.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 1.0, 'PERPOVLVL': 18.0, 'HHMOVE': 1975.0, 'TRASHAMT': 0.0, 'NUMYNGKIDS': 0.0, 'ELECAMT': 80.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 4, 'BLD': 2, 'HHMAR': 4, 'MILHH': 2, 'NUMERRND': 2, 'HHNATVTY': 57, 'MVG1COST': -6, 'COOKFUEL': 1, 'FIREPLACE': 2, 'HHRACE': 1, 'OMB13CBSA': 99998, 'MVG2COST': -6, 'NUMCARE': 2, 'NUMWALK': 2, 'NUMHEAR': 1, 'NUMSEE': 2, 'HSHLDTYPE': 4, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 1, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 2, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 1, 'HHSEX': 1, 'FIRSTHOME': 1, 'GARAGE': 1, 'HHMEMRY': 2, 'HHWALK': 1, 'WINBARS': 2, 'NOSTEP': 1, 'HHERRND': 1, 'HINCP_BIN': Interval(-0.001, 10000.0, closed='right'), 'FINCP_BIN': Interval(-0.001, 10000.0, closed='right')}, {'index': 28803, 'CONTROL': 11085286.0, 'YEAR': 2015.0, 'RATINGHS': 9.0, 'NUMELDERS': 0.0, 'BATHROOMS': 3.0, 'NUMOLDKIDS': 0.0, 'HHAGE': 41.0, 'DINING': 1.0, 'UTILAMT': 260.0, 'NUMVETS': 0.0, 'NUMADULTS': 3.0, 'UFINROOMS': 0.0, 'UNITSIZE': -9.0, 'KITCHENS': 1.0, 'FINROOMS': 1.0, 'TOTROOMS': 5.0, 'HHADLTKIDS': 1.0, 'GASAMT': 80.0, 'WATERAMT': 100.0, 'NUMPEOPLE': 4.0, 'OTHERAMT': 0.0, 'HHYNGKIDS': 1.0, 'NUMSUBFAM': 0.0, 'LAUNDY': 0.0, 'MULTIGEN': 2.0, 'OILAMT': 0.0, 'HINCP': 18000.0, 'LOTSIZE': 1.0, 'FINCP': 18000.0, 'BEDROOMS': 2.0, 'NUMSECFAM': 0.0, 'NUMNONREL': 0.0, 'PARTNER': 0.0, 'STORIES': 2.0, 'PERPOVLVL': 73.0, 'HHMOVE': 2007.0, 'TRASHAMT': 3.0, 'NUMYNGKIDS': 1.0, 'ELECAMT': 80.0, 'HHOLDKIDS': 0.0, 'MVG3COST': -6, 'NUMMEMRY': 1, 'DIVISION': 4, 'BLD': 2, 'HHMAR': 1, 'MILHH': 6, 'NUMERRND': 1, 'HHNATVTY': 303, 'MVG1COST': -6, 'COOKFUEL': 2, 'FIREPLACE': 2, 'HHRACE': 3, 'OMB13CBSA': 99998, 'MVG2COST': -6, 'NUMCARE': 1, 'NUMWALK': 1, 'NUMHEAR': 1, 'NUMSEE': 1, 'HSHLDTYPE': 1, 'OWNLOT': 1, 'HHHEAR': 2, 'HHSEE': 2, 'WASHER': 1, 'KITCHSINK': 1, 'DISHWASH': 1, 'CONDO': 2, 'FRIDGE': 1, 'HHCARE': 2, 'HHSEX': 1, 'FIRSTHOME': 1, 'GARAGE': 2, 'HHMEMRY': 2, 'HHWALK': 2, 'WINBARS': 2, 'NOSTEP': 1, 'HHERRND': 2, 'HINCP_BIN': Interval(10000.0, 20000.0, closed='right'), 'FINCP_BIN': Interval(10000.0, 20000.0, closed='right')})]
(Background on this error at: http://sqlalche.me/e/f405)

In [None]:
from sqlalchemy import create_engine

df_tables = {'ahs_household_step_1':df_fh, 
             'ahs_household_step_4':df_fh4, 
             'ahs_household_class':df_final_class,
             'ahs_household_reg':df_final_reg}
engine = create_engine('postgresql://postgres:Admin123@project.cgxhdwn5zb5t.us-east-1.rds.amazonaws.com:5432/postgres')

for name, df in df_tables.items():
    df.to_sql('{}'.format(name), engine)