In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from math import exp

from sklearn.linear_model import Ridge
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 300)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [2]:
from jj_imputer import HousingImpute
from jj_dummification import *

In [3]:
test_housing = HousingImpute('test.csv')

MSZoning 4
LotFrontage 227
Alley 1352
Utilities 2
Exterior1st 1
Exterior2nd 1
MasVnrType 16
MasVnrArea 15
BsmtQual 44
BsmtCond 45
BsmtExposure 44
BsmtFinType1 42
BsmtFinSF1 1
BsmtFinType2 42
BsmtFinSF2 1
BsmtUnfSF 1
TotalBsmtSF 1
BsmtFullBath 2
BsmtHalfBath 2
KitchenQual 1
Functional 2
FireplaceQu 730
GarageType 76
GarageYrBlt 78
GarageFinish 78
GarageCars 1
GarageArea 1
GarageQual 78
GarageCond 78
PoolQC 1456
Fence 1169
MiscFeature 1408
SaleType 1
MSZoning does not have a current impute method
Utilities does not have a current impute method
Exterior2nd does not have a current impute method
BsmtFinSF1 does not have a current impute method
BsmtFinSF2 does not have a current impute method
TotalBsmtSF does not have a current impute method
BsmtHalfBath does not have a current impute method
Functional does not have a current impute method
GarageCars does not have a current impute method
SaleType does not have a current impute method


In [4]:
test_housing.run_imputers()

no imputer for Exterior1st
MasVnrArea imputer embedded in another imputer
BsmtCond imputer embedded in another imputer
BsmtExposure imputer embedded in another imputer
BsmtFinType1 imputer embedded in another imputer
BsmtFinType2 imputer embedded in another imputer
no imputer for BsmtUnfSF
no imputer for BsmtFullBath
no imputer for KitchenQual
GarageYrBlt imputer embedded in another imputer
GarageFinish imputer embedded in another imputer
no imputer for GarageArea
GarageQual imputer embedded in another imputer
GarageCond imputer embedded in another imputer


In [5]:
test_housing.left_to_impute()

MSZoning 4
--------------------
Id
1916    NaN
2217    NaN
2251    NaN
2905    NaN
Name: MSZoning, dtype: object
--------------------
Utilities 2
--------------------
Id
1916    NaN
1946    NaN
Name: Utilities, dtype: object
--------------------
Exterior1st 1
--------------------
Id
2152    NaN
Name: Exterior1st, dtype: object
--------------------
Exterior2nd 1
--------------------
Id
2152    NaN
Name: Exterior2nd, dtype: object
--------------------
BsmtQual 2
--------------------
Id
2218    NaN
2219    NaN
Name: BsmtQual, dtype: object
--------------------
BsmtCond 3
--------------------
Id
2041    NaN
2186    NaN
2525    NaN
Name: BsmtCond, dtype: object
--------------------
BsmtExposure 2
--------------------
Id
1488    NaN
2349    NaN
Name: BsmtExposure, dtype: object
--------------------
BsmtFinSF1 1
--------------------
Id
2121   nan
Name: BsmtFinSF1, dtype: float64
--------------------
BsmtFinSF2 1
--------------------
Id
2121   nan
Name: BsmtFinSF2, dtype: float64
-------------

In [6]:
test_housing.df.MSZoning.value_counts()

RL         1114
RM          242
FV           74
C (all)      15
RH           10
Name: MSZoning, dtype: int64

In [7]:
test_housing.df.loc[1916,'MSZoning'] = 'RL'
test_housing.df.loc[2217,'MSZoning'] = 'RL'
test_housing.df.loc[2251,'MSZoning'] = 'RL'
test_housing.df.loc[2905,'MSZoning'] = 'RL'

test_housing.left_to_impute()



Utilities 2
--------------------
Id
1916    NaN
1946    NaN
Name: Utilities, dtype: object
--------------------
Exterior1st 1
--------------------
Id
2152    NaN
Name: Exterior1st, dtype: object
--------------------
Exterior2nd 1
--------------------
Id
2152    NaN
Name: Exterior2nd, dtype: object
--------------------
BsmtQual 2
--------------------
Id
2218    NaN
2219    NaN
Name: BsmtQual, dtype: object
--------------------
BsmtCond 3
--------------------
Id
2041    NaN
2186    NaN
2525    NaN
Name: BsmtCond, dtype: object
--------------------
BsmtExposure 2
--------------------
Id
1488    NaN
2349    NaN
Name: BsmtExposure, dtype: object
--------------------
BsmtFinSF1 1
--------------------
Id
2121   nan
Name: BsmtFinSF1, dtype: float64
--------------------
BsmtFinSF2 1
--------------------
Id
2121   nan
Name: BsmtFinSF2, dtype: float64
--------------------
BsmtUnfSF 1
--------------------
Id
2121   nan
Name: BsmtUnfSF, dtype: float64
--------------------
TotalBsmtSF 1
------------

In [8]:
test_housing.df.Utilities.value_counts()

AllPub    1457
Name: Utilities, dtype: int64

In [9]:
test_housing.df.loc[1916,'Utilities'] = 'AllPub'
test_housing.df.loc[1946,'Utilities'] = 'AllPub'

In [10]:
test_housing.left_to_impute()

Exterior1st 1
--------------------
Id
2152    NaN
Name: Exterior1st, dtype: object
--------------------
Exterior2nd 1
--------------------
Id
2152    NaN
Name: Exterior2nd, dtype: object
--------------------
BsmtQual 2
--------------------
Id
2218    NaN
2219    NaN
Name: BsmtQual, dtype: object
--------------------
BsmtCond 3
--------------------
Id
2041    NaN
2186    NaN
2525    NaN
Name: BsmtCond, dtype: object
--------------------
BsmtExposure 2
--------------------
Id
1488    NaN
2349    NaN
Name: BsmtExposure, dtype: object
--------------------
BsmtFinSF1 1
--------------------
Id
2121   nan
Name: BsmtFinSF1, dtype: float64
--------------------
BsmtFinSF2 1
--------------------
Id
2121   nan
Name: BsmtFinSF2, dtype: float64
--------------------
BsmtUnfSF 1
--------------------
Id
2121   nan
Name: BsmtUnfSF, dtype: float64
--------------------
TotalBsmtSF 1
--------------------
Id
2121   nan
Name: TotalBsmtSF, dtype: float64
--------------------
BsmtFullBath 2
-------------------

In [11]:
test_housing.df.Exterior1st.value_counts()

VinylSd    510
MetalSd    230
HdBoard    220
Wd Sdng    205
Plywood    113
CemntBd     65
BrkFace     37
WdShing     30
AsbShng     24
Stucco      18
BrkComm      4
AsphShn      1
CBlock       1
Name: Exterior1st, dtype: int64

In [12]:
test_housing.df.loc[2152,'Exterior1st'] =  'VinylSd'
test_housing.df.loc[2152, 'Exterior2nd'] = 'MetalSd'

In [13]:
test_housing.left_to_impute()

BsmtQual 2
--------------------
Id
2218    NaN
2219    NaN
Name: BsmtQual, dtype: object
--------------------
BsmtCond 3
--------------------
Id
2041    NaN
2186    NaN
2525    NaN
Name: BsmtCond, dtype: object
--------------------
BsmtExposure 2
--------------------
Id
1488    NaN
2349    NaN
Name: BsmtExposure, dtype: object
--------------------
BsmtFinSF1 1
--------------------
Id
2121   nan
Name: BsmtFinSF1, dtype: float64
--------------------
BsmtFinSF2 1
--------------------
Id
2121   nan
Name: BsmtFinSF2, dtype: float64
--------------------
BsmtUnfSF 1
--------------------
Id
2121   nan
Name: BsmtUnfSF, dtype: float64
--------------------
TotalBsmtSF 1
--------------------
Id
2121   nan
Name: TotalBsmtSF, dtype: float64
--------------------
BsmtFullBath 2
--------------------
Id
2121   nan
2189   nan
Name: BsmtFullBath, dtype: float64
--------------------
BsmtHalfBath 2
--------------------
Id
2121   nan
2189   nan
Name: BsmtHalfBath, dtype: float64
--------------------
KitchenQ

In [14]:
test_housing.df.BsmtQual.value_counts()
test_housing.df.BsmtCond.value_counts()


TA         634
Gd         591
Ex         137
Fa          53
No_Bsmt     42
Name: BsmtQual, dtype: int64

TA         1295
Fa           59
Gd           57
No_Bsmt      42
Po            3
Name: BsmtCond, dtype: int64

In [15]:
test_housing.df.loc[2218,'BsmtQual'] = 'TA'
test_housing.df.loc[2219,'BsmtQual'] = 'TA'
test_housing.df.loc[2041,'BsmtCond'] = 'TA'
test_housing.df.loc[2186,'BsmtCond'] = 'TA'
test_housing.df.loc[2525,'BsmtCond'] = 'TA'

test_housing.left_to_impute()


BsmtExposure 2
--------------------
Id
1488    NaN
2349    NaN
Name: BsmtExposure, dtype: object
--------------------
BsmtFinSF1 1
--------------------
Id
2121   nan
Name: BsmtFinSF1, dtype: float64
--------------------
BsmtFinSF2 1
--------------------
Id
2121   nan
Name: BsmtFinSF2, dtype: float64
--------------------
BsmtUnfSF 1
--------------------
Id
2121   nan
Name: BsmtUnfSF, dtype: float64
--------------------
TotalBsmtSF 1
--------------------
Id
2121   nan
Name: TotalBsmtSF, dtype: float64
--------------------
BsmtFullBath 2
--------------------
Id
2121   nan
2189   nan
Name: BsmtFullBath, dtype: float64
--------------------
BsmtHalfBath 2
--------------------
Id
2121   nan
2189   nan
Name: BsmtHalfBath, dtype: float64
--------------------
KitchenQual 1
--------------------
Id
1556    NaN
Name: KitchenQual, dtype: object
--------------------
Functional 2
--------------------
Id
2217    NaN
2474    NaN
Name: Functional, dtype: object
--------------------
GarageYrBlt 2
--------

In [16]:
test_housing.df.BsmtExposure.value_counts()

No         951
Av         197
Gd         142
Mn         125
No_Bsmt     42
Name: BsmtExposure, dtype: int64

In [17]:
test_housing.df.loc[[1488,2349],] #will impute as no exposure but has a basement

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
1488,20,RL,73.0,8987,Pave,No_Alley,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,226.0,Gd,TA,PConc,Gd,TA,,Unf,0.0,Unf,0.0,1595.0,1595.0,GasA,Ex,Y,SBrkr,1595,0,0,1595,0.0,0.0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005.0,RFn,3.0,880.0,TA,TA,Y,144,0,0,0,0,0,No_Pool,No_Fence,No_MF,0,5,2010,WD,Normal
2349,60,FV,81.0,10411,Pave,No_Alley,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,2Story,5,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,CBlock,Gd,TA,,Unf,0.0,Unf,0.0,725.0,725.0,GasA,Ex,Y,SBrkr,725,863,0,1588,0.0,0.0,3,0,3,1,Gd,8,Typ,0,No_FP,Attchd,2007.0,Unf,2.0,561.0,TA,TA,Y,0,0,0,0,0,0,No_Pool,No_Fence,No_MF,0,7,2007,New,Partial


In [18]:
test_housing.df.loc[1488,'BsmtExposure']= 'No'
test_housing.df.loc[2349,'BsmtExposure']= 'No'

In [19]:
test_housing.left_to_impute()

BsmtFinSF1 1
--------------------
Id
2121   nan
Name: BsmtFinSF1, dtype: float64
--------------------
BsmtFinSF2 1
--------------------
Id
2121   nan
Name: BsmtFinSF2, dtype: float64
--------------------
BsmtUnfSF 1
--------------------
Id
2121   nan
Name: BsmtUnfSF, dtype: float64
--------------------
TotalBsmtSF 1
--------------------
Id
2121   nan
Name: TotalBsmtSF, dtype: float64
--------------------
BsmtFullBath 2
--------------------
Id
2121   nan
2189   nan
Name: BsmtFullBath, dtype: float64
--------------------
BsmtHalfBath 2
--------------------
Id
2121   nan
2189   nan
Name: BsmtHalfBath, dtype: float64
--------------------
KitchenQual 1
--------------------
Id
1556    NaN
Name: KitchenQual, dtype: object
--------------------
Functional 2
--------------------
Id
2217    NaN
2474    NaN
Name: Functional, dtype: object
--------------------
GarageYrBlt 2
--------------------
Id
2127   nan
2577   nan
Name: GarageYrBlt, dtype: float64
--------------------
GarageFinish 2
----------

In [20]:
test_housing.df.loc[2121,['BsmtFinSF1','BsmtFinSF2','BsmtUnfSF','TotalBsmtSF','BsmtFullBath','BsmtHalfBath']] = 0.0

In [21]:
test_housing.left_to_impute()

BsmtFullBath 1
--------------------
Id
2189   nan
Name: BsmtFullBath, dtype: float64
--------------------
BsmtHalfBath 1
--------------------
Id
2189   nan
Name: BsmtHalfBath, dtype: float64
--------------------
KitchenQual 1
--------------------
Id
1556    NaN
Name: KitchenQual, dtype: object
--------------------
Functional 2
--------------------
Id
2217    NaN
2474    NaN
Name: Functional, dtype: object
--------------------
GarageYrBlt 2
--------------------
Id
2127   nan
2577   nan
Name: GarageYrBlt, dtype: float64
--------------------
GarageFinish 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageFinish, dtype: object
--------------------
GarageCars 1
--------------------
Id
2577   nan
Name: GarageCars, dtype: float64
--------------------
GarageArea 1
--------------------
Id
2577   nan
Name: GarageArea, dtype: float64
--------------------
GarageQual 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageQual, dtype: object
--------------------
GarageCond 2
------

In [22]:
test_housing.df.loc[2189,['BsmtFullBath','BsmtHalfBath']] = 0.0

In [23]:
test_housing.df.KitchenQual.value_counts()
test_housing.df.loc[1556,]

TA    757
Gd    565
Ex    105
Fa     31
Name: KitchenQual, dtype: int64

MSSubClass              50
MSZoning                RL
LotFrontage       72.00000
LotArea              10632
Street                Pave
Alley             No_Alley
LotShape               IR1
LandContour            Lvl
Utilities           AllPub
LotConfig           Inside
LandSlope              Gtl
Neighborhood       ClearCr
Condition1            Norm
Condition2            Norm
BldgType              1Fam
HouseStyle          1.5Fin
OverallQual              5
OverallCond              3
YearBuilt             1917
YearRemodAdd          1950
RoofStyle            Gable
RoofMatl           CompShg
Exterior1st        Wd Sdng
Exterior2nd        Wd Sdng
MasVnrType            None
MasVnrArea         0.00000
ExterQual               TA
ExterCond               TA
Foundation          BrkTil
BsmtQual                Gd
BsmtCond                Fa
BsmtExposure            No
BsmtFinType1           Unf
BsmtFinSF1         0.00000
BsmtFinType2           Unf
BsmtFinSF2         0.00000
BsmtUnfSF        689.00000
T

In [24]:
test_housing.df.loc[1556, 'KitchenQual'] = 'TA'

In [25]:
test_housing.left_to_impute()

Functional 2
--------------------
Id
2217    NaN
2474    NaN
Name: Functional, dtype: object
--------------------
GarageYrBlt 2
--------------------
Id
2127   nan
2577   nan
Name: GarageYrBlt, dtype: float64
--------------------
GarageFinish 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageFinish, dtype: object
--------------------
GarageCars 1
--------------------
Id
2577   nan
Name: GarageCars, dtype: float64
--------------------
GarageArea 1
--------------------
Id
2577   nan
Name: GarageArea, dtype: float64
--------------------
GarageQual 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageQual, dtype: object
--------------------
GarageCond 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageCond, dtype: object
--------------------
PoolQC 3
--------------------
Id
2421    NaN
2504    NaN
2600    NaN
Name: PoolQC, dtype: object
--------------------
SaleType 1
--------------------
Id
2490    NaN
Name: SaleType, dtype: object
--------------------


In [26]:
test_housing.df.Functional.value_counts()
test_housing.df.loc[[2217,2474],'Functional'] = 'Typ'

Typ     1357
Min2      36
Min1      34
Mod       20
Maj1       5
Maj2       4
Sev        1
Name: Functional, dtype: int64

In [27]:
test_housing.left_to_impute()

GarageYrBlt 2
--------------------
Id
2127   nan
2577   nan
Name: GarageYrBlt, dtype: float64
--------------------
GarageFinish 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageFinish, dtype: object
--------------------
GarageCars 1
--------------------
Id
2577   nan
Name: GarageCars, dtype: float64
--------------------
GarageArea 1
--------------------
Id
2577   nan
Name: GarageArea, dtype: float64
--------------------
GarageQual 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageQual, dtype: object
--------------------
GarageCond 2
--------------------
Id
2127    NaN
2577    NaN
Name: GarageCond, dtype: object
--------------------
PoolQC 3
--------------------
Id
2421    NaN
2504    NaN
2600    NaN
Name: PoolQC, dtype: object
--------------------
SaleType 1
--------------------
Id
2490    NaN
Name: SaleType, dtype: object
--------------------


In [28]:
test_housing.df.loc[[2127,2577],]
test_housing.df.loc[2127,'GarageYrBlt'] = test_housing.df.loc[2127,'YearBuilt']
test_housing.df.loc[2577,'GarageYrBlt'] = test_housing.df.loc[2577,'YearBuilt']
test_housing.df.loc[[2127,2577],'GarageFinish'] = 'Unf'
test_housing.df.loc[[2127,2577],'GarageQual'] = 'TA'
test_housing.df.loc[[2127,2577],'GarageCond'] = 'TA'

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
2127,60,RM,57.0,8094,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2.5Unf,6,8,1910,1983,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,TA,TA,PConc,TA,TA,Mn,Rec,196.0,Unf,0.0,1046.0,1242.0,GasA,Gd,Y,SBrkr,1242,742,0,1984,0.0,0.0,2,0,5,1,TA,8,Typ,0,No_FP,Detchd,,,1.0,360.0,,,Y,64,0,180,0,0,0,No_Pool,MnPrv,Shed,1000,9,2008,WD,Normal
2577,70,RM,50.0,9060,Pave,No_Alley,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,2Story,5,6,1923,1999,Gable,CompShg,Wd Sdng,Plywood,,0.0,TA,TA,BrkTil,Gd,TA,No,ALQ,548.0,Unf,0.0,311.0,859.0,GasA,Ex,Y,SBrkr,942,886,0,1828,0.0,0.0,2,0,3,1,Gd,6,Typ,0,No_FP,Detchd,,,,,,,Y,174,0,212,0,0,0,No_Pool,MnPrv,No_MF,0,3,2007,WD,Alloca


In [29]:
arby = pd.read_csv('train.csv', index_col =0)
arby.loc[arby['GarageType']=='Detchd',['GarageCars','GarageArea']].mean()

arby.loc[arby['GarageType']=='Detchd',['GarageCars']].mode()

GarageCars     1.57623
GarageArea   426.85788
dtype: float64

Unnamed: 0,GarageCars
0,2


In [30]:
test_housing.df.loc[2577,'GarageCars'] = 2
test_housing.df.loc[2577,'GarageArea'] = 427
test_housing.left_to_impute()

PoolQC 3
--------------------
Id
2421    NaN
2504    NaN
2600    NaN
Name: PoolQC, dtype: object
--------------------
SaleType 1
--------------------
Id
2490    NaN
Name: SaleType, dtype: object
--------------------


In [31]:
test_housing.df.loc[[2421,2504,2600],]
test_housing.df.PoolQC.value_counts()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
2421,20,RL,75.0,9532,Pave,No_Alley,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1953,1953,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,BLQ,595.0,Rec,354.0,156.0,1105.0,GasA,Gd,Y,SBrkr,1647,0,0,1647,1.0,0.0,1,0,3,1,TA,6,Min1,1,Fa,Attchd,1953.0,Fin,1.0,280.0,TA,TA,Y,225,0,0,0,0,368,,GdPrv,No_MF,0,2,2007,WD,Normal
2504,50,RL,104.0,23920,Pave,No_Alley,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Feedr,Norm,1Fam,1.5Fin,6,5,1984,1984,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,TA,TA,CBlock,TA,TA,No,Unf,0.0,Unf,0.0,1105.0,1105.0,GasA,Ex,Y,SBrkr,1105,717,0,1822,0.0,0.0,2,0,4,1,Gd,7,Min2,1,Po,Attchd,1984.0,Unf,2.0,515.0,TA,TA,P,0,195,1012,0,0,444,,No_Fence,No_MF,0,4,2007,WD,Normal
2600,20,RL,200.0,43500,Pave,No_Alley,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Artery,Norm,1Fam,1Story,3,5,1953,1953,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,No_Bsmt,No_Bsmt,No_Bsmt,No_Bsmt,0.0,No_Bsmt,0.0,0.0,0.0,GasA,Ex,Y,SBrkr,2034,0,0,2034,0.0,0.0,1,0,2,1,TA,9,Min1,0,No_FP,2Types,1953.0,RFn,4.0,1041.0,TA,TA,N,483,266,0,0,0,561,,GdPrv,No_MF,0,6,2007,WD,Normal


No_Pool    1453
Ex            2
Gd            1
Name: PoolQC, dtype: int64

In [32]:
test_housing.df.loc[[2421,2504],'PoolQC'] = 'Ex'
test_housing.df.loc[2600,'PoolQC'] = 'Gd'

test_housing.left_to_impute()

SaleType 1
--------------------
Id
2490    NaN
Name: SaleType, dtype: object
--------------------


In [33]:
test_housing.df.SaleType.value_counts()
test_housing.df.SaleCondition.value_counts()

WD       1258
New       117
COD        44
ConLD      17
CWD         8
ConLI       4
Oth         4
Con         3
ConLw       3
Name: SaleType, dtype: int64

Normal     1204
Partial     120
Abnorml      89
Family       26
Alloca       12
AdjLand       8
Name: SaleCondition, dtype: int64

In [34]:
test_housing.df.loc[2490,'SaleType'] = 'WD'

In [35]:
test_housing.left_to_impute()

In [36]:
test_housing.save_df('imputed_test')

In [37]:
test_housing.df.shape

(1459, 79)