In [17]:
import os
import json
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, validation_curve
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import explained_variance_score, r2_score, mean_squared_error

In [128]:
df_reg = pd.read_csv(os.path.join(os.getcwd(), 'data', 'working', 'AHS Household Reg.csv'))
df_class =pd.read_csv(os.path.join(os.getcwd(), 'data', 'working', 'AHS Household Class.csv'))

In [129]:
def createXY(df):
    y = df['RATINGHS'].copy().values
    X = df.drop(['RATINGHS','CONTROL','YEAR'], axis=1).copy().values
    return X, y

### Linear Regression

In [153]:
X, y = createXY(df_reg)

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, train_size=0.66)

In [74]:
pd.set_option('max_columns', 500)
df_reg.head()

Unnamed: 0.1,Unnamed: 0,CONTROL,YEAR,RATINGHS,EXPSUM,LN_HINCP,LN_FINCP,TOTROOMS,PERPOVLVL,DINING,LAUNDY,STORIES,HHAGE,HHMOVE,PARTNER,NUMELDERS,NUMADULTS,NUMNONREL,HHYNGKIDS,HHOLDKIDS,NUMVETS,NUMYNGKIDS,NUMOLDKIDS,NUMSUBFAM,NUMSECFAM,NUMPEOPLE,HHADLTKIDS,MULTIGEN,UFINROOMS,LOTSIZE,FINROOMS,UNITSIZE,BEDROOMS,KITCHENS,BATHROOMS,DIVISION_1.0,DIVISION_2.0,DIVISION_3.0,DIVISION_4.0,DIVISION_5.0,DIVISION_6.0,DIVISION_7.0,DIVISION_8.0,DIVISION_9.0,OMB13CBSA_12060.0,OMB13CBSA_14460.0,OMB13CBSA_16980.0,OMB13CBSA_19100.0,OMB13CBSA_19820.0,OMB13CBSA_26420.0,OMB13CBSA_31080.0,OMB13CBSA_33100.0,OMB13CBSA_35620.0,OMB13CBSA_37980.0,OMB13CBSA_38060.0,OMB13CBSA_40140.0,OMB13CBSA_41860.0,OMB13CBSA_42660.0,OMB13CBSA_47900.0,OMB13CBSA_99998.0,OMB13CBSA_99999.0,BLD_1.0,BLD_2.0,BLD_3.0,BLD_4.0,BLD_5.0,BLD_6.0,BLD_7.0,BLD_8.0,BLD_9.0,BLD_10.0,HHMAR_1.0,HHMAR_2.0,HHMAR_3.0,HHMAR_4.0,HHMAR_5.0,HHMAR_6.0,MILHH_1.0,MILHH_2.0,MILHH_3.0,MILHH_4.0,MILHH_5.0,MILHH_6.0,HHRACE_1.0,HHRACE_2.0,HHRACE_3.0,HHRACE_4.0,HHRACE_5.0,HHRACE_6.0,HHRACE_7.0,HHRACE_8.0,HHRACE_9.0,HHRACE_10.0,HHRACE_11.0,HHRACE_12.0,HHRACE_13.0,HHRACE_14.0,HHRACE_15.0,HHRACE_16.0,HHRACE_18.0,HHRACE_20.0,HHNATVTY_57.0,HHNATVTY_66.0,HHNATVTY_69.0,HHNATVTY_73.0,HHNATVTY_78.0,HHNATVTY_100.0,HHNATVTY_102.0,HHNATVTY_103.0,HHNATVTY_104.0,HHNATVTY_105.0,HHNATVTY_106.0,HHNATVTY_108.0,HHNATVTY_109.0,HHNATVTY_110.0,HHNATVTY_116.0,HHNATVTY_117.0,HHNATVTY_119.0,HHNATVTY_120.0,HHNATVTY_126.0,HHNATVTY_127.0,HHNATVTY_128.0,HHNATVTY_129.0,HHNATVTY_132.0,HHNATVTY_134.0,HHNATVTY_136.0,HHNATVTY_137.0,HHNATVTY_138.0,HHNATVTY_139.0,HHNATVTY_140.0,HHNATVTY_142.0,HHNATVTY_147.0,HHNATVTY_148.0,HHNATVTY_149.0,HHNATVTY_150.0,HHNATVTY_151.0,HHNATVTY_152.0,HHNATVTY_154.0,HHNATVTY_157.0,HHNATVTY_158.0,HHNATVTY_159.0,HHNATVTY_160.0,HHNATVTY_161.0,HHNATVTY_162.0,HHNATVTY_163.0,HHNATVTY_164.0,HHNATVTY_165.0,HHNATVTY_166.0,HHNATVTY_168.0,HHNATVTY_200.0,HHNATVTY_202.0,HHNATVTY_205.0,HHNATVTY_206.0,HHNATVTY_207.0,HHNATVTY_209.0,HHNATVTY_210.0,HHNATVTY_211.0,HHNATVTY_212.0,HHNATVTY_213.0,HHNATVTY_214.0,HHNATVTY_215.0,HHNATVTY_216.0,HHNATVTY_217.0,HHNATVTY_220.0,HHNATVTY_222.0,HHNATVTY_223.0,HHNATVTY_224.0,HHNATVTY_226.0,HHNATVTY_228.0,HHNATVTY_229.0,HHNATVTY_231.0,HHNATVTY_233.0,HHNATVTY_235.0,HHNATVTY_238.0,HHNATVTY_239.0,HHNATVTY_240.0,HHNATVTY_242.0,HHNATVTY_243.0,HHNATVTY_245.0,HHNATVTY_246.0,HHNATVTY_247.0,HHNATVTY_248.0,HHNATVTY_249.0,HHNATVTY_300.0,HHNATVTY_301.0,HHNATVTY_303.0,HHNATVTY_310.0,HHNATVTY_311.0,HHNATVTY_312.0,HHNATVTY_313.0,HHNATVTY_314.0,HHNATVTY_315.0,HHNATVTY_316.0,HHNATVTY_321.0,HHNATVTY_323.0,HHNATVTY_324.0,HHNATVTY_327.0,HHNATVTY_328.0,HHNATVTY_329.0,HHNATVTY_330.0,HHNATVTY_332.0,HHNATVTY_333.0,HHNATVTY_338.0,HHNATVTY_339.0,HHNATVTY_340.0,HHNATVTY_341.0,HHNATVTY_343.0,HHNATVTY_360.0,HHNATVTY_361.0,HHNATVTY_362.0,HHNATVTY_363.0,HHNATVTY_364.0,HHNATVTY_365.0,HHNATVTY_368.0,HHNATVTY_369.0,HHNATVTY_370.0,HHNATVTY_373.0,HHNATVTY_374.0,HHNATVTY_399.0,HHNATVTY_400.0,HHNATVTY_407.0,HHNATVTY_408.0,HHNATVTY_414.0,HHNATVTY_416.0,HHNATVTY_417.0,HHNATVTY_421.0,HHNATVTY_423.0,HHNATVTY_425.0,HHNATVTY_427.0,HHNATVTY_429.0,HHNATVTY_436.0,HHNATVTY_440.0,HHNATVTY_444.0,HHNATVTY_447.0,HHNATVTY_448.0,HHNATVTY_449.0,HHNATVTY_451.0,HHNATVTY_453.0,HHNATVTY_454.0,HHNATVTY_457.0,HHNATVTY_461.0,HHNATVTY_462.0,HHNATVTY_501.0,HHNATVTY_508.0,HHNATVTY_515.0,HHNATVTY_523.0,HHNATVTY_527.0,HHNATVTY_555.0,HSHLDTYPE_1.0,HSHLDTYPE_2.0,HSHLDTYPE_3.0,HSHLDTYPE_4.0,HSHLDTYPE_5.0,HSHLDTYPE_6.0,HSHLDTYPE_7.0,COOKFUEL_1.0,COOKFUEL_2.0,COOKFUEL_3.0,COOKFUEL_4.0,COOKFUEL_5.0,FIREPLACE_1.0,FIREPLACE_2.0,FIREPLACE_3.0,FIREPLACE_4.0,NUMHEAR_1.0,NUMHEAR_2.0,NUMHEAR_3.0,NUMSEE_1.0,NUMSEE_2.0,NUMSEE_3.0,NUMMEMRY_1.0,NUMMEMRY_2.0,NUMMEMRY_3.0,NUMWALK_1.0,NUMWALK_2.0,NUMWALK_3.0,NUMCARE_1.0,NUMCARE_2.0,NUMCARE_3.0,NUMERRND_1.0,NUMERRND_2.0,NUMERRND_3.0,MVG1COST_-6.0,MVG1COST_1.0,MVG1COST_2.0,MVG1COST_3.0,MVG2COST_-6.0,MVG2COST_1.0,MVG2COST_2.0,MVG2COST_3.0,MVG3COST_-6.0,MVG3COST_1.0,MVG3COST_2.0,MVG3COST_3.0,BATHEXCLU_-6.0,BATHEXCLU_1.0,BATHEXCLU_2.0,NOSTEP_1.0,NOSTEP_2.0,CONDO_1.0,CONDO_2.0,SEARCHFAM_-6.0,SEARCHFAM_1.0,SEARCHFAM_2.0,SEARCHLIST_-6.0,SEARCHLIST_1.0,SEARCHLIST_2.0,SEARCHNET_-6.0,SEARCHNET_1.0,SEARCHNET_2.0,SEARCHOTH_-6.0,SEARCHOTH_1.0,SEARCHOTH_2.0,SEARCHPUB_-6.0,SEARCHPUB_1.0,SEARCHPUB_2.0,SEARCHREA_-6.0,SEARCHREA_1.0,SEARCHREA_2.0,SEARCHSIGN_-6.0,SEARCHSIGN_1.0,SEARCHSIGN_2.0,RMCHANGE_-6.0,RMCHANGE_1.0,RMCHANGE_2.0,RMCOMMUTE_-6.0,RMCOMMUTE_1.0,RMCOMMUTE_2.0,RMCOSTS_-6.0,RMCOSTS_1.0,RMCOSTS_2.0,RMFAMILY_-6.0,RMFAMILY_1.0,RMFAMILY_2.0,RMHOME_-6.0,RMHOME_1.0,RMHOME_2.0,RMHOOD_-6.0,RMHOOD_1.0,RMHOOD_2.0,RMJOB_-6.0,RMJOB_1.0,RMJOB_2.0,RMOWNHH_-6.0,RMOWNHH_1.0,RMOWNHH_2.0,RMOTHER_-6.0,RMOTHER_1.0,RMOTHER_2.0,OWNLOT_-6.0,OWNLOT_1.0,OWNLOT_2.0,HHSEX_1.0,HHSEX_2.0,KITEXCLU_-6.0,KITEXCLU_1.0,KITEXCLU_2.0,FRIDGE_1.0,FRIDGE_2.0,KITCHSINK_1.0,KITCHSINK_2.0,WASHER_1.0,WASHER_2.0,WINBARS_-6.0,WINBARS_1.0,WINBARS_2.0,HHHEAR_1.0,HHHEAR_2.0,HHSEE_1.0,HHSEE_2.0,HHMEMRY_1.0,HHMEMRY_2.0,HHWALK_1.0,HHWALK_2.0,HHCARE_1.0,HHCARE_2.0,HHERRND_1.0,HHERRND_2.0,FIRSTHOME_1.0
0,0,11000006,2017,10.0,0.206244,0.0,0.0,5.0,361.0,0.0,1.0,1.0,64.0,1990.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,3.0,1.0,4.0,3.0,1.0,3.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1
1,1,11000016,2017,8.0,0.216178,0.0,0.0,6.0,501.0,1.0,0.0,3.0,38.0,2015.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,2.0,0.0,5.0,1.0,4.0,3.0,1.0,2.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1
2,2,11000017,2017,9.0,0.206717,0.0,0.0,7.0,501.0,1.0,0.0,3.0,43.0,2016.0,3.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,2.0,2.0,6.0,3.0,1.0,4.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1
3,3,11000023,2017,8.0,0.602649,0.0,0.0,7.0,501.0,1.0,0.0,3.0,52.0,2004.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,2.0,2.0,4.0,3.0,1.0,2.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1
4,4,11000046,2017,10.0,0.197729,0.0,0.0,5.0,52.0,0.0,0.0,1.0,37.0,2000.0,0.0,0.0,2.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,5.0,0.0,2.0,0.0,2.0,1.0,2.0,3.0,1.0,2.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1


In [154]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

In [155]:
classifiers = [LinearRegression(),
              Lasso(),
              Ridge()]

In [167]:
scores_list = []
for classifier in classifiers:
    pipe = Pipeline(steps=[('classifier', classifier)])
    pipe.fit(X_train, y_train)
    print(classifier)
    print("model score: %.3f" % pipe.score(X_test, y_test))
    scores_list.append([classifier, pipe.score(X_test, y_test)])
    print('\n')

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
model score: 0.022


Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)
model score: 0.012


Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)
model score: 0.022




In [168]:
scores_list

[[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
           normalize=False), 0.022055588479851118],
 [Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
     normalize=False, positive=False, precompute=False, random_state=None,
     selection='cyclic', tol=0.0001, warm_start=False), 0.012346060663770307],
 [Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
     normalize=False, random_state=None, solver='auto', tol=0.001),
  0.022088164339460592]]

Trying to make polynomials work

In [157]:
def PolynomialRegression(degree=2, **kwargs):
    return make_pipeline(PolynomialFeatures(degree), 
                         LinearRegression(**kwargs))

In [162]:
poly_model = make_pipeline(PolynomialFeatures(2), LinearRegression())

In [163]:
poly_model.fit(X_train, y_train)

MemoryError: 

In [None]:
y_model = poly_model.predict(X_test[:, np.newaxis], y)

In [110]:
poly = PolynomialFeatures(3, include_bias=False)
poly.fit_transform(X_train)

MemoryError: 

Just do the models...

In [173]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print('R-Squared: {}'.format(r2_score(y_test, y_pred)))

R-Squared: 0.11593292920423126


# Classification Models!

In [100]:
from sklearn import metrics

from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, SGDClassifier
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn import tree
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier

In [130]:
df_class =pd.read_csv(os.path.join(os.getcwd(), 'data', 'working', 'AHS Household Class.csv'))

In [131]:
y = df_class['RATINGHS_BIN'].copy().values
X = df_class.drop(['RATINGHS_BIN','CONTROL','YEAR'], axis=1).copy().values

In [132]:
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)

In [150]:
def score_model(X, y, estimator, **kwargs):
    """
    Test various estimators.
    """ 
    #NOTE: for capstone add X_test, X_train, Y_test, Y_train for capstone code.  
    #Bake into model to see if it does cross validation, if not there do CV.
   
    scores = {'precision':[], 'recall':[], 'accuracy':[], 'f1':[]}
    
    # Instantiate the classification model and visualizer
    model.fit(X, y, **kwargs)  
    
    expected  = y
    predicted = model.predict(X)
        
    # Append our scores to the tracker
    scores['precision'].append(metrics.precision_score(expected, predicted, average=None))
    scores['recall'].append(metrics.recall_score(expected, predicted, average=None))
    scores['accuracy'].append(metrics.accuracy_score(expected, predicted))
    scores['f1'].append(metrics.f1_score(expected, predicted, average=None))
        
    # Compute and return F1 (harmonic mean of precision and recall), Precision, Recall, Accuracy   
    print("{}".format(estimator.__class__.__name__))    
    print("Validation scores are as follows:\n")
    print(pd.DataFrame(scores).mean())

In [134]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, train_size=0.66)



In [149]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

scores = {'precision':[], 'recall':[], 'accuracy':[], 'f1':[]}
scores['precision'].append(metrics.precision_score(y_test, y_pred, average=None))
scores['recall'].append(metrics.recall_score(y_test, y_pred, average=None))
scores['accuracy'].append(metrics.accuracy_score(y_test, y_pred))
scores['f1'].append(metrics.f1_score(y_test, y_pred, average=None))

print("{}".format(GaussianNB().__class__.__name__))    
print("Validation scores are as follows:\n")
print(pd.DataFrame(scores).mean())

GaussianNB
Validation scores are as follows:

precision    0.386680
recall       0.687198
accuracy     0.356170
f1           0.494890
Name: 0, dtype: float64


In [144]:
metrics.precision_score(y_test, y_pred, average=None)

array([0.38668026, 0.33272974, 0.30555556, 0.19144144])

In [151]:
models = [
    GaussianNB(), 
    MultinomialNB(),
    BernoulliNB(),
    tree.DecisionTreeClassifier(),
    LinearDiscriminantAnalysis(),
    LogisticRegression(solver='lbfgs', max_iter=6000), 
    LogisticRegressionCV(cv=3, max_iter=6000), 
    BaggingClassifier(), 
    ExtraTreesClassifier(n_estimators=100), 
    RandomForestClassifier(n_estimators=100)
]
for model in models:
    score_model(X, y, model)

GaussianNB
Validation scores are as follows:

precision    0.388247
recall       0.677084
accuracy     0.358867
f1           0.493510
Name: 0, dtype: float64


ValueError: Input X must be non-negative