In [46]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [47]:
data=pd.read_csv("train.csv")
pd.pandas.set_option('display.max_columns',None)

In [48]:
feature_nan=[feature for feature in data.columns if data[feature].isnull().sum()>0 and data[feature].dtypes=='O']
feature_nan

['Alley',
 'MasVnrType',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Electrical',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PoolQC',
 'Fence',
 'MiscFeature']

In [49]:
def replace_nan(data,feature_nan):
    dataset=data.copy()
    dataset[feature_nan]=dataset[feature_nan].fillna('Missing')
    return dataset
data=replace_nan(data,feature_nan)
data[feature_nan].isnull().sum()

Alley           0
MasVnrType      0
BsmtQual        0
BsmtCond        0
BsmtExposure    0
BsmtFinType1    0
BsmtFinType2    0
Electrical      0
FireplaceQu     0
GarageType      0
GarageFinish    0
GarageQual      0
GarageCond      0
PoolQC          0
Fence           0
MiscFeature     0
dtype: int64

In [50]:
numerical_nan=[feature for feature in data.columns if data[feature].isnull().sum()>0 and data[feature].dtype != 'O']
numerical_nan

['LotFrontage', 'MasVnrArea', 'GarageYrBlt']

In [51]:
for feature in numerical_nan:
    median_val=data[feature].median()
    data[feature+'nan']=np.where(data[feature].isnull(),1,0)
    data[feature].fillna(median_val,inplace=True)
data[numerical_nan].isnull().sum()

LotFrontage    0
MasVnrArea     0
GarageYrBlt    0
dtype: int64

In [52]:
for feature in ['YearBuilt','YearRemodAdd','GarageYrBlt']:
    data[feature]=data['YrSold']-data[feature]
data[['YearBuilt','YearRemodAdd','GarageYrBlt']].head()

Unnamed: 0,YearBuilt,YearRemodAdd,GarageYrBlt
0,5,5,5.0
1,31,31,31.0
2,7,6,7.0
3,91,36,8.0
4,8,8,8.0


In [53]:
replace_feature=['LotFrontage','LotArea','1stFlrSF','GrLivArea','SalePrice']
for feature in replace_feature:
    data[feature]=np.log(data[feature])
data[['LotFrontage','LotArea','1stFlrSF','GrLivArea','SalePrice']].head()

Unnamed: 0,LotFrontage,LotArea,1stFlrSF,GrLivArea,SalePrice
0,4.174387,9.041922,6.75227,7.444249,12.247694
1,4.382027,9.169518,7.140453,7.140453,12.109011
2,4.219508,9.328123,6.824374,7.487734,12.317167
3,4.094345,9.164296,6.867974,7.448334,11.849398
4,4.430817,9.565214,7.04316,7.695303,12.429216


In [54]:
categorical_var=[feature for feature in data.columns if data[feature].dtype=='O']
categorical_var

['MSZoning',
 'Street',
 'Alley',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'SaleType',
 'SaleCondition']

In [55]:
len(categorical_var)
data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,LotFrontagenan,MasVnrAreanan,GarageYrBltnan
0,1,60,RL,4.174387,9.041922,Pave,Missing,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,5,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,6.75227,854,0,7.444249,1,0,2,1,3,1,Gd,8,Typ,0,Missing,Attchd,5.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,Missing,Missing,Missing,0,2,2008,WD,Normal,12.247694,0,0,0
1,2,20,RL,4.382027,9.169518,Pave,Missing,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,31,31,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,7.140453,0,0,7.140453,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,31.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,Missing,Missing,Missing,0,5,2007,WD,Normal,12.109011,0,0,0
2,3,60,RL,4.219508,9.328123,Pave,Missing,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,7,6,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,6.824374,866,0,7.487734,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,7.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,Missing,Missing,Missing,0,9,2008,WD,Normal,12.317167,0,0,0
3,4,70,RL,4.094345,9.164296,Pave,Missing,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,91,36,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,6.867974,756,0,7.448334,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,8.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,Missing,Missing,Missing,0,2,2006,WD,Abnorml,11.849398,0,0,0
4,5,60,RL,4.430817,9.565214,Pave,Missing,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,8,8,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,7.04316,1053,0,7.695303,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,8.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,Missing,Missing,Missing,0,12,2008,WD,Normal,12.429216,0,0,0


In [56]:
for features in categorical_var:
    
    t=data.groupby(feature)['SalePrice'].count()/len(data)
t.head()


SalePrice
10.460242    0.000685
10.471950    0.000685
10.542706    0.000685
10.578980    0.000685
10.596635    0.000685
Name: SalePrice, dtype: float64

In [57]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for feature in categorical_var:
    data[feature]=le.fit_transform(data[feature])

In [58]:
data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,LotFrontagenan,MasVnrAreanan,GarageYrBltnan
0,1,60,3,4.174387,9.041922,1,1,3,3,0,4,0,5,2,2,0,5,7,5,5,5,1,1,12,13,1,196.0,2,4,2,2,4,4,2,706,6,0,150,856,1,0,1,5,6.752270,854,0,7.444249,1,0,2,1,3,1,2,8,6,0,3,1,5.0,2,2,548,5,5,2,0,61,0,0,0,0,3,2,1,0,2,2008,8,4,12.247694,0,0,0
1,2,20,3,4.382027,9.169518,1,1,3,3,0,2,0,24,1,2,0,2,6,8,31,31,1,1,8,8,3,0.0,3,4,1,2,4,1,0,978,6,0,284,1262,1,0,1,5,7.140453,0,0,7.140453,0,1,2,0,3,1,3,6,6,1,5,1,31.0,2,2,460,5,5,2,298,0,0,0,0,0,3,2,1,0,5,2007,8,4,12.109011,0,0,0
2,3,60,3,4.219508,9.328123,1,1,0,3,0,4,0,5,2,2,0,5,7,5,7,6,1,1,12,13,1,162.0,2,4,2,2,4,3,2,486,6,0,434,920,1,0,1,5,6.824374,866,0,7.487734,1,0,2,1,3,1,2,6,6,1,5,1,7.0,2,2,608,5,5,2,0,42,0,0,0,0,3,2,1,0,9,2008,8,4,12.317167,0,0,0
3,4,70,3,4.094345,9.164296,1,1,0,3,0,0,0,6,2,2,0,5,7,5,91,36,1,1,13,15,3,0.0,3,4,0,4,1,4,0,216,6,0,540,756,1,2,1,5,6.867974,756,0,7.448334,1,0,1,0,3,1,2,7,6,1,2,5,8.0,3,3,642,5,5,2,0,35,272,0,0,0,3,2,1,0,2,2006,8,0,11.849398,0,0,0
4,5,60,3,4.430817,9.565214,1,1,0,3,0,2,0,15,2,2,0,5,8,5,8,8,1,1,12,13,1,350.0,2,4,2,2,4,0,2,655,6,0,490,1145,1,0,1,5,7.043160,1053,0,7.695303,1,0,2,1,4,1,2,9,6,1,5,1,8.0,2,3,836,5,5,2,192,84,0,0,0,0,3,2,1,0,12,2008,8,4,12.429216,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,3,4.127134,8.976768,1,1,3,3,0,4,0,8,2,2,0,5,6,5,8,7,1,1,12,13,3,0.0,3,4,2,2,4,4,6,0,6,0,953,953,1,0,1,5,6.859615,694,0,7.406711,0,0,2,1,3,1,3,7,6,1,5,1,8.0,2,2,460,5,5,2,0,40,0,0,0,0,3,2,1,0,8,2007,8,4,12.072541,0,0,0
1456,1457,20,3,4.442651,9.486076,1,1,3,3,0,4,0,14,2,2,0,2,6,6,32,22,1,1,9,10,4,119.0,3,4,1,2,4,4,0,790,5,163,589,1542,1,4,1,5,7.636752,0,0,7.636752,1,0,2,0,3,1,3,7,2,2,5,1,32.0,3,2,500,5,5,2,349,0,0,0,0,0,3,3,1,0,2,2010,8,4,12.254863,0,0,0
1457,1458,70,3,4.189655,9.109636,1,1,3,3,0,4,0,6,2,2,0,5,7,9,69,4,1,1,5,5,3,0.0,0,2,4,4,1,4,2,275,6,0,877,1152,1,0,1,5,7.080026,1152,0,7.757906,0,0,2,0,4,1,2,9,6,2,2,1,69.0,2,1,252,5,5,2,0,60,0,0,0,0,3,0,3,2500,5,2010,8,4,12.493130,0,0,0
1458,1459,20,3,4.219508,9.181632,1,1,3,3,0,4,0,12,2,2,0,2,5,6,60,14,3,1,8,8,3,0.0,3,4,1,4,4,3,2,49,5,1029,0,1078,1,2,1,0,6.982863,0,0,6.982863,1,0,1,0,2,1,2,5,6,0,3,1,60.0,3,1,240,5,5,2,366,0,112,0,0,0,3,2,1,0,4,2010,8,4,11.864462,0,0,0


In [59]:
from sklearn.preprocessing import MinMaxScaler

In [60]:
feature_scale=[feature for feature in data.columns if feature not in ['Id','SalePrice']]


In [61]:
scaler=MinMaxScaler()
scaler.fit(data[feature_scale])
scaler.transform(data[feature_scale])

array([[0.23529412, 0.75      , 0.41820812, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.75      , 0.49506375, ..., 0.        , 0.        ,
        0.        ],
       [0.23529412, 0.75      , 0.434909  , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.29411765, 0.75      , 0.42385922, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.75      , 0.434909  , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.75      , 0.47117546, ..., 0.        , 0.        ,
        0.        ]])

In [63]:
new_var=data[['Id','SalePrice']]




In [43]:
dataset.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,0.235294,0.75,0.418208,0.366344,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.036765,0.098361,0.2,0.142857,0.857143,0.866667,0.25,0.1225,0.666667,1.0,0.4,0.5,1.0,1.0,0.333333,0.125089,1.0,0.0,0.064212,0.140098,0.2,0.0,1.0,1.0,0.356155,0.413559,0.0,0.577712,0.333333,0.0,0.666667,0.5,0.375,0.333333,0.666667,0.5,1.0,0.0,0.6,0.166667,0.046729,0.666667,0.5,0.38646,1.0,1.0,1.0,0.0,0.111517,0.0,0.0,0.0,0.0,1.0,0.5,0.25,0.0,0.090909,0.5,1.0,0.8
1,0.0,0.75,0.495064,0.391317,1.0,0.5,1.0,1.0,0.0,0.5,0.0,1.0,0.125,0.285714,0.0,0.285714,0.555556,0.875,0.227941,0.52459,0.2,0.142857,0.571429,0.533333,0.75,0.0,1.0,1.0,0.2,0.5,1.0,0.25,0.0,0.173281,1.0,0.0,0.121575,0.206547,0.2,0.0,1.0,1.0,0.503056,0.0,0.0,0.470245,0.0,0.5,0.666667,0.0,0.375,0.333333,1.0,0.333333,1.0,0.333333,1.0,0.166667,0.28972,0.666667,0.5,0.324401,1.0,1.0,1.0,0.347725,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.25,0.0,0.363636,0.25,1.0,0.8
2,0.235294,0.75,0.434909,0.422359,1.0,0.5,0.0,1.0,0.0,1.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.051471,0.114754,0.2,0.142857,0.857143,0.866667,0.25,0.10125,0.666667,1.0,0.4,0.5,1.0,0.75,0.333333,0.086109,1.0,0.0,0.185788,0.150573,0.2,0.0,1.0,1.0,0.383441,0.41937,0.0,0.593095,0.333333,0.0,0.666667,0.5,0.375,0.333333,0.666667,0.333333,1.0,0.333333,1.0,0.166667,0.065421,0.666667,0.5,0.428773,1.0,1.0,1.0,0.0,0.076782,0.0,0.0,0.0,0.0,1.0,0.5,0.25,0.0,0.727273,0.5,1.0,0.8
3,0.294118,0.75,0.388581,0.390295,1.0,0.5,0.0,1.0,0.0,0.0,0.0,0.25,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.669118,0.606557,0.2,0.142857,0.928571,1.0,0.75,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.0,0.038271,1.0,0.0,0.231164,0.123732,0.2,0.5,1.0,1.0,0.399941,0.366102,0.0,0.579157,0.333333,0.0,0.333333,0.0,0.375,0.333333,0.666667,0.416667,1.0,0.333333,0.4,0.833333,0.074766,1.0,0.75,0.45275,1.0,1.0,1.0,0.0,0.063985,0.492754,0.0,0.0,0.0,1.0,0.5,0.25,0.0,0.090909,0.0,1.0,0.0
4,0.235294,0.75,0.513123,0.468761,1.0,0.5,0.0,1.0,0.0,0.5,0.0,0.625,0.25,0.285714,0.0,0.714286,0.777778,0.5,0.058824,0.147541,0.2,0.142857,0.857143,0.866667,0.25,0.21875,0.666667,1.0,0.4,0.5,1.0,0.0,0.333333,0.116052,1.0,0.0,0.20976,0.187398,0.2,0.0,1.0,1.0,0.466237,0.509927,0.0,0.666523,0.333333,0.0,0.666667,0.5,0.5,0.333333,0.666667,0.583333,1.0,0.333333,1.0,0.166667,0.074766,0.666667,0.75,0.589563,1.0,1.0,1.0,0.224037,0.153565,0.0,0.0,0.0,0.0,1.0,0.5,0.25,0.0,1.0,0.5,1.0,0.8


In [64]:
dataset=pd.DataFrame(scaler.transform(data[feature_scale]),columns=feature_scale)
new_dataset=pd.concat([(data[['Id','SalePrice']].reset_index(drop=True)),dataset],axis=1)

In [65]:
new_dataset

Unnamed: 0,Id,SalePrice,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,LotFrontagenan,MasVnrAreanan,GarageYrBltnan
0,1,12.247694,0.235294,0.75,0.418208,0.366344,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.208333,0.250,0.285714,0.0,0.714286,0.666667,0.500,0.036765,0.098361,0.2,0.142857,0.857143,0.866667,0.25,0.122500,0.666667,1.0,0.4,0.5,1.00,1.00,0.333333,0.125089,1.000000,0.000000,0.064212,0.140098,0.2,0.0,1.0,1.0,0.356155,0.413559,0.0,0.577712,0.333333,0.0,0.666667,0.5,0.375,0.333333,0.666667,0.500000,1.000000,0.000000,0.6,0.166667,0.046729,0.666667,0.50,0.386460,1.0,1.0,1.0,0.000000,0.111517,0.000000,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,0.090909,0.50,1.0,0.8,0.0,0.0,0.0
1,2,12.109011,0.000000,0.75,0.495064,0.391317,1.0,0.5,1.0,1.0,0.0,0.5,0.0,1.000000,0.125,0.285714,0.0,0.285714,0.555556,0.875,0.227941,0.524590,0.2,0.142857,0.571429,0.533333,0.75,0.000000,1.000000,1.0,0.2,0.5,1.00,0.25,0.000000,0.173281,1.000000,0.000000,0.121575,0.206547,0.2,0.0,1.0,1.0,0.503056,0.000000,0.0,0.470245,0.000000,0.5,0.666667,0.0,0.375,0.333333,1.000000,0.333333,1.000000,0.333333,1.0,0.166667,0.289720,0.666667,0.50,0.324401,1.0,1.0,1.0,0.347725,0.000000,0.000000,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,0.363636,0.25,1.0,0.8,0.0,0.0,0.0
2,3,12.317167,0.235294,0.75,0.434909,0.422359,1.0,0.5,0.0,1.0,0.0,1.0,0.0,0.208333,0.250,0.285714,0.0,0.714286,0.666667,0.500,0.051471,0.114754,0.2,0.142857,0.857143,0.866667,0.25,0.101250,0.666667,1.0,0.4,0.5,1.00,0.75,0.333333,0.086109,1.000000,0.000000,0.185788,0.150573,0.2,0.0,1.0,1.0,0.383441,0.419370,0.0,0.593095,0.333333,0.0,0.666667,0.5,0.375,0.333333,0.666667,0.333333,1.000000,0.333333,1.0,0.166667,0.065421,0.666667,0.50,0.428773,1.0,1.0,1.0,0.000000,0.076782,0.000000,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,0.727273,0.50,1.0,0.8,0.0,0.0,0.0
3,4,11.849398,0.294118,0.75,0.388581,0.390295,1.0,0.5,0.0,1.0,0.0,0.0,0.0,0.250000,0.250,0.285714,0.0,0.714286,0.666667,0.500,0.669118,0.606557,0.2,0.142857,0.928571,1.000000,0.75,0.000000,1.000000,1.0,0.0,1.0,0.25,1.00,0.000000,0.038271,1.000000,0.000000,0.231164,0.123732,0.2,0.5,1.0,1.0,0.399941,0.366102,0.0,0.579157,0.333333,0.0,0.333333,0.0,0.375,0.333333,0.666667,0.416667,1.000000,0.333333,0.4,0.833333,0.074766,1.000000,0.75,0.452750,1.0,1.0,1.0,0.000000,0.063985,0.492754,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,0.090909,0.00,1.0,0.0,0.0,0.0,0.0
4,5,12.429216,0.235294,0.75,0.513123,0.468761,1.0,0.5,0.0,1.0,0.0,0.5,0.0,0.625000,0.250,0.285714,0.0,0.714286,0.777778,0.500,0.058824,0.147541,0.2,0.142857,0.857143,0.866667,0.25,0.218750,0.666667,1.0,0.4,0.5,1.00,0.00,0.333333,0.116052,1.000000,0.000000,0.209760,0.187398,0.2,0.0,1.0,1.0,0.466237,0.509927,0.0,0.666523,0.333333,0.0,0.666667,0.5,0.500,0.333333,0.666667,0.583333,1.000000,0.333333,1.0,0.166667,0.074766,0.666667,0.75,0.589563,1.0,1.0,1.0,0.224037,0.153565,0.000000,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,1.000000,0.50,1.0,0.8,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,12.072541,0.235294,0.75,0.400718,0.353592,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.333333,0.250,0.285714,0.0,0.714286,0.555556,0.500,0.058824,0.131148,0.2,0.142857,0.857143,0.866667,0.75,0.000000,1.000000,1.0,0.4,0.5,1.00,1.00,1.000000,0.000000,1.000000,0.000000,0.407962,0.155974,0.2,0.0,1.0,1.0,0.396777,0.336077,0.0,0.564433,0.000000,0.0,0.666667,0.5,0.375,0.333333,1.000000,0.416667,1.000000,0.333333,1.0,0.166667,0.074766,0.666667,0.50,0.324401,1.0,1.0,1.0,0.000000,0.073126,0.000000,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,0.636364,0.25,1.0,0.8,0.0,0.0,0.0
1456,1457,12.254863,0.000000,0.75,0.517503,0.453273,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.583333,0.250,0.285714,0.0,0.285714,0.555556,0.625,0.235294,0.377049,0.2,0.142857,0.642857,0.666667,1.00,0.074375,1.000000,1.0,0.2,0.5,1.00,1.00,0.000000,0.139972,0.833333,0.110583,0.252140,0.252373,0.2,1.0,1.0,1.0,0.690872,0.000000,0.0,0.645810,0.333333,0.0,0.666667,0.0,0.375,0.333333,1.000000,0.416667,0.333333,0.666667,1.0,0.166667,0.299065,1.000000,0.50,0.352609,1.0,1.0,1.0,0.407235,0.000000,0.000000,0.0,0.0,0.0,1.0,0.75,0.25,0.00000,0.090909,1.00,1.0,0.8,0.0,0.0,0.0
1457,1458,12.493130,0.294118,0.75,0.423859,0.379597,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.250000,0.250,0.285714,0.0,0.714286,0.666667,1.000,0.507353,0.081967,0.2,0.142857,0.357143,0.333333,0.75,0.000000,0.000000,0.5,0.8,1.0,0.25,1.00,0.333333,0.048724,1.000000,0.000000,0.375428,0.188543,0.2,0.0,1.0,1.0,0.480189,0.557869,0.0,0.688669,0.000000,0.0,0.666667,0.0,0.500,0.333333,0.666667,0.583333,1.000000,0.666667,0.4,0.166667,0.644860,0.666667,0.25,0.177715,1.0,1.0,1.0,0.000000,0.109689,0.000000,0.0,0.0,0.0,1.0,0.00,0.75,0.16129,0.363636,1.00,1.0,0.8,0.0,0.0,0.0
1458,1459,11.864462,0.000000,0.75,0.434909,0.393688,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.500000,0.250,0.285714,0.0,0.285714,0.444444,0.625,0.441176,0.245902,0.6,0.142857,0.571429,0.533333,0.75,0.000000,1.000000,1.0,0.2,1.0,1.00,0.75,0.333333,0.008682,0.833333,0.698100,0.000000,0.176432,0.2,0.5,1.0,0.0,0.443419,0.000000,0.0,0.414497,0.333333,0.0,0.333333,0.0,0.250,0.333333,0.666667,0.250000,1.000000,0.000000,0.6,0.166667,0.560748,1.000000,0.25,0.169252,1.0,1.0,1.0,0.427071,0.000000,0.202899,0.0,0.0,0.0,1.0,0.50,0.25,0.00000,0.272727,1.00,1.0,0.8,0.0,0.0,0.0


In [67]:
new_dataset.to_csv('X_train_adv_house.csv',index=False)