# Preprocessing and Feature Engineering

In [266]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer

In [267]:
housing = pd.read_csv('./datasets/test_preproc.csv')

In [268]:
# set up display

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_info_rows', 500)
pd.set_option('display.max_rows', 2500)
# idea taken from Jeff Hale

## Missing Values

In [269]:
print(f'Number of rows: {housing.shape[0]}')

print(f'Rows with missing data: {housing.isna().sum().count()}')

Number of rows: 878
Rows with missing data: 80


In [270]:
# display columns with missing values

housing.isna().sum()

Id                   0
PID                  0
MS SubClass          0
MS Zoning            0
Lot Frontage       160
Lot Area             0
Street               0
Alley              820
Lot Shape            0
Land Contour         0
Utilities            0
Lot Config           0
Land Slope           0
Neighborhood         0
Condition 1          0
Condition 2          0
Bldg Type            0
House Style          0
Overall Qual         0
Overall Cond         0
Year Built           0
Year Remod/Add       0
Roof Style           0
Roof Matl            0
Exterior 1st         0
Exterior 2nd         0
Mas Vnr Type         1
Mas Vnr Area         1
Exter Qual           0
Exter Cond           0
Foundation           0
Bsmt Qual           25
Bsmt Cond           25
Bsmt Exposure       25
BsmtFin Type 1      25
BsmtFin SF 1         0
BsmtFin Type 2      25
BsmtFin SF 2         0
Bsmt Unf SF          0
Total Bsmt SF        0
Heating              0
Heating QC           0
Central Air          0
Electrical 

In [271]:
housing['Garage Cars'] = housing['Garage Cars'].astype(float)

In [272]:
housing['Full Bath'].head()

0    2
1    2
2    2
3    1
4    1
Name: Full Bath, dtype: int64

In [273]:
# function to replace missing values 
# it returns None - it is faster this way

def na_replacer(dataframe, columns, new_value):
    for column in columns:
        dataframe[column].fillna(new_value, inplace=True)

In [274]:
# replace NaN with 'No'

columns_to_change = ['Mas Vnr Type', 'Bsmt Qual', 'Bsmt Cond', 
                    'Bsmt Exposure', 'BsmtFin Type 1', 
                    'BsmtFin Type 2', 'Fireplace Qu', 
                    'Bsmt Full Bath', 'Bsmt Half Bath',
                    'Misc Feature', 'Fence', 'Pool QC',
                    'Garage Type', 'Garage Finish', 'Garage Qual', 
                    'Garage Cond', 'Alley', 'Garage Yr Blt']

na_replacer(housing, columns_to_change, 'No')

In [275]:
'Garage Cars' in housing.columns

True

In [276]:
# replace NaN with 0

replace_with_0 = ['BsmtFin SF 1', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF', 
                  'Garage Area',
                  'Mas Vnr Area']

na_replacer(housing, replace_with_0, 0)

In [277]:
'Garage Cars' in housing.columns

True

In [278]:
# check if there are rows where the Garage Year Built is missing - 
# but other data indicates that there was a garage

housing[(housing['Garage Yr Blt'].isna()) & (housing['Garage Type'] != 'No Garage')]

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,Land Slope,Neighborhood,Condition 1,Condition 2,Bldg Type,House Style,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Roof Style,Roof Matl,Exterior 1st,Exterior 2nd,Mas Vnr Type,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating,Heating QC,Central Air,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Type,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type


In [279]:
'Garage Cars' in housing.columns

True

In [280]:
# garage year built - where there is no garage is NaN

housing['Garage Yr Blt'].isna().sum()

0

In [281]:
'Garage Cars' in housing.columns

True

In [282]:
# replace missing values in 'Lot Frontage' with the median linear feet value

median = housing['Lot Frontage'].median()
print(f"Median lot frontage: {median}")
na_replacer(housing, ['Lot Frontage'], median)

Median lot frontage: 68.0


In [283]:
'Garage Cars' in housing.columns

True

In [284]:
# check missing data for 'Electrical' - replace it with the most frequent value

mode = housing['Electrical'].mode()
print(f"Most frequent electrical system: {mode[0]}")
na_replacer(housing, ['Electrical'], mode[0])

Most frequent electrical system: SBrkr


In [285]:
'Garage Cars' in housing.columns

True

In [286]:
housing.isna().sum().sum()

0

# Create Dummy Variables

In [287]:
# create MS Zoning dummy = 'non-residential' ('Non Resid')
# 1 where zoning is Agricultural, Commercial, Industrial

non_resid = (housing['MS Zoning'] == 'A (agr)') | (housing['MS Zoning'] == 'I (all)') | (housing['MS Zoning'] == 'C (all)')
housing['Non Resid'] = non_resid.map({False: 0, True: 1})

In [288]:
'Garage Cars' in housing.columns

True

In [289]:
# create open porch binary variable = has open porch = 1, no open porch = 0

housing['Has Open Porch'] = [1 if i > 0 else 0 for i in housing['Open Porch SF']]
housing['Has Open Porch'].value_counts()

1    490
0    388
Name: Has Open Porch, dtype: int64

In [290]:
# get dummies function

def dummies(dataframe, columns):
    for column in columns:
        print(column)
        dataframe = pd.get_dummies(dataframe, columns=column, drop_first=True)
    return dataframe

In [291]:
columns = [housing.columns.drop(['Id', 'PID', 'Lot Frontage', 'Lot Area',
                      'Overall Qual', 'Overall Cond', 'Year Built',
                      'Year Remod/Add', 'Mas Vnr Area', 'BsmtFin SF 1', 
                      'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF',
                      '1st Flr SF', '2nd Flr SF', 'Low Qual Fin SF', 
                      'Gr Liv Area', 'Garage Yr Blt', 'Garage Area', 
                      'Wood Deck SF', 'Open Porch SF', 'Enclosed Porch',
                      '3Ssn Porch', 'Screen Porch', 'Pool Area', 'Yr Sold', 
                      'Non Resid', 'Has Open Porch'])]

In [292]:
'Garage Cars' in housing.columns

True

In [293]:
housing = dummies(housing, columns)
housing.shape

Index(['MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Lot Shape',
       'Land Contour', 'Utilities', 'Lot Config', 'Land Slope', 'Neighborhood',
       'Condition 1', 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style',
       'Roof Matl', 'Exterior 1st', 'Exterior 2nd', 'Mas Vnr Type',
       'Exter Qual', 'Exter Cond', 'Foundation', 'Bsmt Qual', 'Bsmt Cond',
       'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating',
       'Heating QC', 'Central Air', 'Electrical', 'Bsmt Full Bath',
       'Bsmt Half Bath', 'Full Bath', 'Half Bath', 'Bedroom AbvGr',
       'Kitchen AbvGr', 'Kitchen Qual', 'TotRms AbvGrd', 'Functional',
       'Fireplaces', 'Fireplace Qu', 'Garage Type', 'Garage Finish',
       'Garage Cars', 'Garage Qual', 'Garage Cond', 'Paved Drive', 'Pool QC',
       'Fence', 'Misc Feature', 'Misc Val', 'Mo Sold', 'Sale Type'],
      dtype='object')


(878, 313)

In [294]:
'Full Bath_1' in housing.columns

True

In [295]:
'Garage Cars' in housing.columns

False

In [296]:
# create enclosed porch binary variable = has enclosed porch = 1, no enclosed porch = 0

housing['Has Enclosed Porch'] = [1 if i > 0 else 0 for i in housing['Enclosed Porch']]
housing['Has Enclosed Porch'].value_counts()

0    746
1    132
Name: Has Enclosed Porch, dtype: int64

In [297]:
housing['Has Wood Deck'] = [1 if i > 0 else 0 for i in housing['Wood Deck SF']]
housing['Has Wood Deck'].value_counts()

0    451
1    427
Name: Has Wood Deck, dtype: int64

In [298]:
# has three season porch

housing['Has 3S Porch'] = [1 if i > 0 else 0 for i in housing['3Ssn Porch']]
housing['Has 3S Porch'].value_counts()

0    867
1     11
Name: Has 3S Porch, dtype: int64

In [299]:
# has screened porch

housing['Has Screened Porch'] = [1 if i > 0 else 0 for i in housing['Screen Porch']]
housing['Has Screened Porch'].value_counts()

0    803
1     75
Name: Has Screened Porch, dtype: int64

In [300]:
# has pool

housing['Has Pool'] = [1 if i > 0 else 0 for i in housing['Pool Area']]
housing['Has Pool'].value_counts()

0    874
1      4
Name: Has Pool, dtype: int64

## Feature Engineering

In [301]:
# create variable 'Total SF' - 

housing['Total SF'] = (housing['Total Bsmt SF'] - housing['Bsmt Unf SF']) + \
                        housing['Gr Liv Area']
housing['Total SF'].head()

0    1928
1    1967
2    2050
3     968
4    2003
Name: Total SF, dtype: int64

In [302]:
housing = dummies(housing, [['Yr Sold']])

['Yr Sold']


In [303]:
housing.head()

Unnamed: 0,Id,PID,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Garage Yr Blt,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Non Resid,Has Open Porch,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Alley_No,Alley_Pave,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Land Slope_Mod,Land Slope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_No,Mas Vnr Type_None,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Fa,Exter Cond_Gd,Exter Cond_Po,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_No,Bsmt Qual_Po,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_No,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_No,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_No,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_GasA,Heating_GasW,Heating_Grav,Heating QC_Fa,Heating QC_Gd,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Bsmt Full Bath_1,Bsmt Full Bath_2,Bsmt Half Bath_1,Full Bath_1,Full Bath_2,Full Bath_3,Full Bath_4,Half Bath_1,Half Bath_2,Bedroom AbvGr_1,Bedroom AbvGr_2,Bedroom AbvGr_3,Bedroom AbvGr_4,Bedroom AbvGr_5,Bedroom AbvGr_6,Kitchen AbvGr_1,Kitchen AbvGr_2,Kitchen AbvGr_3,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Po,Kitchen Qual_TA,TotRms AbvGrd_4,TotRms AbvGrd_5,TotRms AbvGrd_6,TotRms AbvGrd_7,TotRms AbvGrd_8,TotRms AbvGrd_9,TotRms AbvGrd_10,TotRms AbvGrd_11,TotRms AbvGrd_12,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Fireplaces_1,Fireplaces_2,Fireplaces_3,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_No,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_No,Garage Finish_No,Garage Finish_RFn,Garage Finish_Unf,Garage Cars_1.0,Garage Cars_2.0,Garage Cars_3.0,Garage Cars_4.0,Garage Qual_Gd,Garage Qual_No,Garage Qual_Po,Garage Qual_TA,Garage Cond_Fa,Garage Cond_Gd,Garage Cond_No,Garage Cond_Po,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_No,Pool QC_TA,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_No,Misc Feature_No,Misc Feature_Othr,Misc Feature_Shed,Misc Val_350,Misc Val_400,Misc Val_420,Misc Val_450,Misc Val_480,Misc Val_490,Misc Val_500,Misc Val_560,Misc Val_600,Misc Val_620,Misc Val_650,Misc Val_700,Misc Val_750,Misc Val_1000,Misc Val_1200,Misc Val_1400,Misc Val_1500,Misc Val_1512,Misc Val_2000,Misc Val_15500,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Has Enclosed Porch,Has Wood Deck,Has 3S Porch,Has Screened Porch,Has Pool,Total SF,Yr Sold_2007,Yr Sold_2008,Yr Sold_2009,Yr Sold_2010
0,2658,902301120,69.0,9142,6,8,1910,1950,0.0,0,0,1020,1020,908,1020,0,1928,1910,440,0,60,112,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1928,0,0,0,0
1,2718,905108090,68.0,9662,5,4,1977,1977,0.0,0,0,1967,1967,1967,0,0,1967,1977,580,170,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1967,0,0,0,0
2,2414,528218130,58.0,17104,7,5,2006,2006,0.0,554,0,100,654,664,832,0,1496,2006,426,100,24,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,2050,0,0,0,0
3,1989,902207150,60.0,8520,5,6,1923,2006,0.0,0,0,968,968,968,0,0,968,1935,480,0,0,184,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,968,1,0,0,0
4,625,535105100,68.0,9500,6,5,1963,1963,247.0,609,0,785,1394,1394,0,0,1394,1963,514,0,76,0,0,185,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,2003,0,0,1,0


In [304]:
housing.to_csv('./datasets/modified_test.csv', index=False)