# Feature engineering for our test set

- This is the same code as the one provided in FE01
- Its purpose is to process the data to generate the 'X_test.csv' 
- Code comments and a more detailed explanation can be seen in FE01

- Dataset downloaded from: [House Data](https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/data)

In [1]:
import pandas as pd #type: ignore
import numpy as np #type: ignore
import matplotlib.pyplot as plt #type: ignore
%matplotlib inline

pd.pandas.set_option('display.max_columns', None)

In [2]:
# Load the test set

dataset = pd.read_csv('Data/test.csv')
dataset.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,TA,Y,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1961.0,Unf,1.0,730.0,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,TA,TA,CBlock,TA,TA,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,TA,Y,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958.0,Unf,1.0,312.0,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,Gd,Y,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997.0,Fin,2.0,482.0,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,20.0,TA,TA,PConc,TA,TA,No,GLQ,602.0,Unf,0.0,324.0,926.0,GasA,Ex,Y,SBrkr,926,678,0,1604,0.0,0.0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,1998.0,Fin,2.0,470.0,TA,TA,Y,360,36,0,0,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,,0.0,Gd,TA,PConc,Gd,TA,No,ALQ,263.0,Unf,0.0,1017.0,1280.0,GasA,Ex,Y,SBrkr,1280,0,0,1280,0.0,0.0,2,0,2,1,Gd,5,Typ,0,,Attchd,1992.0,RFn,2.0,506.0,TA,TA,Y,0,82,0,0,144,0,,,,0,1,2010,WD,Normal


### Handle missing categorical features

In [3]:
features_nan_categorical = [feature for feature in dataset.columns if dataset[feature].isnull().sum() > 0 and dataset[feature].dtypes == 'O']

for feature in features_nan_categorical:
    print(f'{feature}: {np.round(dataset[feature].isnull().mean(), 4)} % missing values')

MSZoning: 0.0027 % missing values
Alley: 0.9267 % missing values
Utilities: 0.0014 % missing values
Exterior1st: 0.0007 % missing values
Exterior2nd: 0.0007 % missing values
MasVnrType: 0.6127 % missing values
BsmtQual: 0.0302 % missing values
BsmtCond: 0.0308 % missing values
BsmtExposure: 0.0302 % missing values
BsmtFinType1: 0.0288 % missing values
BsmtFinType2: 0.0288 % missing values
KitchenQual: 0.0007 % missing values
Functional: 0.0014 % missing values
FireplaceQu: 0.5003 % missing values
GarageType: 0.0521 % missing values
GarageFinish: 0.0535 % missing values
GarageQual: 0.0535 % missing values
GarageCond: 0.0535 % missing values
PoolQC: 0.9979 % missing values
Fence: 0.8012 % missing values
MiscFeature: 0.965 % missing values
SaleType: 0.0007 % missing values


In [4]:
def replace_categorical_features(dataset: pd.DataFrame, features_nan: list):
    data = dataset.copy()
    data[features_nan] = data[features_nan].fillna('Missing')
    return data

dataset = replace_categorical_features(dataset=dataset, features_nan=features_nan_categorical)

dataset[features_nan_categorical].isnull().sum()

MSZoning        0
Alley           0
Utilities       0
Exterior1st     0
Exterior2nd     0
MasVnrType      0
BsmtQual        0
BsmtCond        0
BsmtExposure    0
BsmtFinType1    0
BsmtFinType2    0
KitchenQual     0
Functional      0
FireplaceQu     0
GarageType      0
GarageFinish    0
GarageQual      0
GarageCond      0
PoolQC          0
Fence           0
MiscFeature     0
SaleType        0
dtype: int64

### Handle missing numerical features

In [5]:
features_nan_numerical = [feature for feature in dataset.columns if dataset[feature].isnull().sum() > 0 and dataset[feature].dtypes != 'O']

for feature in features_nan_numerical:
    print(f'{feature}: {np.round(dataset[feature].isnull().mean(), 4)} %missing values')

LotFrontage: 0.1556 %missing values
MasVnrArea: 0.0103 %missing values
BsmtFinSF1: 0.0007 %missing values
BsmtFinSF2: 0.0007 %missing values
BsmtUnfSF: 0.0007 %missing values
TotalBsmtSF: 0.0007 %missing values
BsmtFullBath: 0.0014 %missing values
BsmtHalfBath: 0.0014 %missing values
GarageYrBlt: 0.0535 %missing values
GarageCars: 0.0007 %missing values
GarageArea: 0.0007 %missing values


In [6]:
for feature in features_nan_numerical:
    median_value = dataset[feature].median()
    dataset[feature + 'nan'] = np.where(dataset[feature].isnull(), 1, 0)
    dataset[feature] = dataset[feature].fillna(median_value)

dataset[features_nan_numerical].isnull().sum()

LotFrontage     0
MasVnrArea      0
BsmtFinSF1      0
BsmtFinSF2      0
BsmtUnfSF       0
TotalBsmtSF     0
BsmtFullBath    0
BsmtHalfBath    0
GarageYrBlt     0
GarageCars      0
GarageArea      0
dtype: int64

### Handle temporal variables

In [7]:
features_temporal = ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']

for feature in features_temporal:
    dataset[feature] = dataset['YrSold'] - dataset[feature]

dataset[features_temporal]

Unnamed: 0,YearBuilt,YearRemodAdd,GarageYrBlt
0,49,49,49.0
1,52,52,52.0
2,13,12,13.0
3,12,12,12.0
4,18,18,18.0
...,...,...,...
1454,36,36,27.0
1455,36,36,36.0
1456,46,10,46.0
1457,14,14,27.0


### Handle skewness of numerical features

- By applying a log normal distribution

In [8]:
numerical_features_continuous = ['LotFrontage', 'LotArea', '1stFlrSF', 'GrLivArea']


for feature in numerical_features_continuous:
    dataset[feature] = np.log(dataset[feature])


### Handle rare categorical features

In [9]:
categorical_features = [feature for feature in dataset.columns if dataset[feature].dtypes == 'O']

for feature in categorical_features:
    percentage = dataset.groupby(feature).size() / len(dataset)

    percentage_df = percentage[percentage > 0.01].index

    dataset[feature] = np.where(dataset[feature].isin(percentage_df), dataset[feature], 'Rare_var')

### Encode categorical features

In [10]:
encoding_maps = pd.read_pickle('pickles/encoding_maps.pickle')

for feature in categorical_features:
    dataset[feature] = dataset[feature].map(encoding_maps[feature]).fillna(-1)

dataset[categorical_features]

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,0.0,1,2,0,1,1,0,0,8,1.0,1,3,3,0,0,10,10.0,1.0,1.0,3,2,2,3,1,1,3.0,2,2,1,3,1.0,4,1,4.0,1,2,3,2,0,2,2,2.0,3
1,3.0,1,2,1,1,1,1,0,8,2.0,1,3,3,2,0,2,1.0,2.0,1.0,3,2,2,3,1,4,5.0,2,2,1,3,2.0,4,1,4.0,1,2,3,2,0,4,0,2.0,3
2,3.0,1,2,1,1,1,0,0,13,2.0,1,3,5,0,0,10,10.0,1.0,1.0,3,4,3,3,1,6,5.0,2,3,1,3,1.0,4,3,4.0,3,2,3,2,0,2,2,2.0,3
3,3.0,1,2,1,1,1,0,0,13,2.0,1,3,5,0,0,10,10.0,2.0,1.0,3,4,2,3,1,6,5.0,2,4,1,3,2.0,4,4,4.0,3,2,3,2,0,4,2,2.0,3
4,3.0,1,2,1,3,1,0,0,20,2.0,1,4,3,0,0,6,5.0,1.0,2.0,3,4,3,3,1,4,5.0,2,4,1,3,2.0,4,1,4.0,2,2,3,2,0,4,2,2.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,1.0,1,2,0,1,1,0,0,1,2.0,1,1,5,0,0,9,9.0,1.0,1.0,3,2,2,3,1,5,5.0,2,3,1,3,1.0,4,1,0.0,0,0,0,2,0,4,2,2.0,3
1455,1.0,1,2,0,1,1,0,0,1,2.0,1,4,5,0,0,9,9.0,1.0,1.0,3,2,2,3,1,1,5.0,2,2,1,3,1.0,4,1,1.0,1,2,3,2,0,4,2,2.0,0
1456,3.0,1,2,0,1,1,0,0,9,2.0,1,3,3,0,0,10,10.0,1.0,1.0,3,2,2,3,1,4,5.0,2,4,1,3,1.0,4,3,2.0,1,2,3,2,0,4,2,2.0,0
1457,3.0,1,2,0,1,1,0,0,9,2.0,1,3,0,0,0,6,4.0,1.0,1.0,3,4,3,3,3,6,5.0,2,2,1,3,1.0,4,1,0.0,0,0,0,2,0,2,1,2.0,3


In [11]:
dataset.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,LotFrontagenan,MasVnrAreanan,BsmtFinSF1nan,BsmtFinSF2nan,BsmtUnfSFnan,TotalBsmtSFnan,BsmtFullBathnan,BsmtHalfBathnan,GarageYrBltnan,GarageCarsnan,GarageAreanan
0,1461,20,0.0,4.382027,9.360655,1,2,0,1,1,0,0,8,1.0,1,3,3,5,6,49,49,0,0,10,10.0,1.0,0.0,1.0,3,2,2,3,1,1,468.0,3.0,144.0,270.0,882.0,2,2,1,3,6.79794,0,0,6.79794,0.0,0.0,1,0,2,1,1.0,5,4,0,1,4.0,49.0,1,1.0,730.0,2,3,2,140,0,0,0,120,0,0,2,2,0,6,2010,2.0,3,0,0,0,0,0,0,0,0,0,0,0
1,1462,20,3.0,4.394449,9.565704,1,2,1,1,1,1,0,8,2.0,1,3,3,6,6,52,52,2,0,2,1.0,2.0,108.0,1.0,3,2,2,3,1,4,923.0,5.0,0.0,406.0,1329.0,2,2,1,3,7.192182,0,0,7.192182,0.0,0.0,1,1,3,1,2.0,6,4,0,1,4.0,52.0,1,1.0,312.0,2,3,2,393,36,0,0,0,0,0,4,0,12500,6,2010,2.0,3,0,0,0,0,0,0,0,0,0,0,0
2,1463,60,3.0,4.304065,9.534595,1,2,1,1,1,0,0,13,2.0,1,3,5,5,5,13,12,0,0,10,10.0,1.0,0.0,1.0,3,4,3,3,1,6,791.0,5.0,0.0,137.0,928.0,2,3,1,3,6.833032,701,0,7.395722,0.0,0.0,2,1,3,1,1.0,6,4,1,3,4.0,13.0,3,2.0,482.0,2,3,2,212,34,0,0,0,0,0,2,2,0,3,2010,2.0,3,0,0,0,0,0,0,0,0,0,0,0
3,1464,60,3.0,4.356709,9.208138,1,2,1,1,1,0,0,13,2.0,1,3,5,6,6,12,12,0,0,10,10.0,2.0,20.0,1.0,3,4,2,3,1,6,602.0,5.0,0.0,324.0,926.0,2,4,1,3,6.830874,678,0,7.380256,0.0,0.0,2,1,3,1,2.0,7,4,1,4,4.0,12.0,3,2.0,470.0,2,3,2,360,36,0,0,0,0,0,4,2,0,6,2010,2.0,3,0,0,0,0,0,0,0,0,0,0,0
4,1465,120,3.0,3.7612,8.518193,1,2,1,3,1,0,0,20,2.0,1,4,3,8,5,18,18,0,0,6,5.0,1.0,0.0,2.0,3,4,3,3,1,4,263.0,5.0,0.0,1017.0,1280.0,2,4,1,3,7.154615,0,0,7.154615,0.0,0.0,2,0,2,1,2.0,5,4,0,1,4.0,18.0,2,2.0,506.0,2,3,2,0,82,0,0,144,0,0,4,2,0,1,2010,2.0,3,0,0,0,0,0,0,0,0,0,0,0


### Apply feature scaling

In [12]:
scaled_features = [feature for feature in dataset.columns if feature not in ['Id']]

# We must use the same scaler as the one in Feature engineering on the training set, and now we will just transform, not fit.
# Thus, we prevent data leakage
from sklearn.preprocessing import MinMaxScaler
import pickle

with open('pickles/scaler.pickle', 'rb') as f:
    scaler = pickle.load(f)

# We also need to load the correct order of features as it was in our previous feature engineering
with open('pickles/scaler_features_order.pickle', 'rb') as f:
    feature_order = pickle.load(f)


ids = dataset[['Id']]
dataset = dataset[feature_order]

In [14]:
data = pd.concat([ids.reset_index(drop=True), 
                  pd.DataFrame(scaler.transform(dataset[feature_order]),
                               columns=scaled_features)], axis=1)

In [15]:
data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,LotFrontagenan,MasVnrAreanan,BsmtFinSF1nan,BsmtFinSF2nan,BsmtUnfSFnan,TotalBsmtSFnan,BsmtFullBathnan,BsmtHalfBathnan,GarageYrBltnan,GarageCarsnan,GarageAreanan
0,1461,0.0,0.0,0.495064,0.428726,1.0,1.0,0.0,0.333333,1.0,0.0,0.0,0.363636,0.2,1.0,0.75,0.6,0.444444,0.625,0.360294,0.819672,0.0,0.0,1.0,1.0,0.333333,0.0,0.333333,1.0,0.5,0.5,0.75,0.25,0.166667,0.08292,0.5,0.097693,0.115582,0.144354,1.0,0.5,1.0,1.0,0.373438,0.0,0.0,0.349081,0.0,0.0,0.333333,0.0,0.25,0.333333,0.333333,0.25,1.0,0.0,0.2,0.8,0.457944,0.333333,0.25,0.51481,0.666667,1.0,1.0,0.163361,0.0,0.0,0.0,0.25,0.0,0.0,0.5,1.0,0.0,0.454545,1.0,0.666667,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1462,0.0,0.75,0.499662,0.468857,1.0,1.0,0.333333,0.333333,1.0,0.25,0.0,0.363636,0.4,1.0,0.75,0.6,0.555556,0.625,0.382353,0.868852,1.0,0.0,0.2,0.1,0.666667,0.0675,0.333333,1.0,0.5,0.5,0.75,0.25,0.666667,0.163536,0.833333,0.0,0.173801,0.217512,1.0,0.5,1.0,1.0,0.522632,0.0,0.0,0.488544,0.0,0.0,0.333333,0.5,0.375,0.333333,0.666667,0.333333,1.0,0.0,0.2,0.8,0.485981,0.333333,0.25,0.220028,0.666667,1.0,1.0,0.458576,0.065814,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.806452,0.454545,1.0,0.666667,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1463,0.235294,0.75,0.466207,0.462769,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.590909,0.4,1.0,0.75,1.0,0.444444,0.5,0.095588,0.213115,0.0,0.0,1.0,1.0,0.333333,0.0,0.333333,1.0,1.0,0.75,0.75,0.25,1.0,0.140149,0.833333,0.0,0.058647,0.151882,1.0,0.75,1.0,1.0,0.386718,0.339467,0.0,0.560546,0.0,0.0,0.666667,0.5,0.375,0.333333,0.333333,0.333333,1.0,0.333333,0.6,0.8,0.121495,1.0,0.5,0.339915,0.666667,1.0,1.0,0.247375,0.062157,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.0,0.181818,1.0,0.666667,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1464,0.235294,0.75,0.485693,0.398875,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.590909,0.4,1.0,0.75,1.0,0.555556,0.625,0.088235,0.213115,0.0,0.0,1.0,1.0,0.666667,0.0125,0.333333,1.0,1.0,0.5,0.75,0.25,1.0,0.106662,0.833333,0.0,0.138699,0.151555,1.0,1.0,1.0,1.0,0.385901,0.328329,0.0,0.555075,0.0,0.0,0.666667,0.5,0.375,0.333333,0.666667,0.416667,1.0,0.333333,0.8,0.8,0.11215,1.0,0.5,0.331453,0.666667,1.0,1.0,0.42007,0.065814,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.454545,1.0,0.666667,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1465,0.588235,0.75,0.265271,0.263841,1.0,1.0,0.333333,1.0,1.0,0.0,0.0,0.909091,0.4,1.0,1.0,0.6,0.777778,0.5,0.132353,0.311475,0.0,0.0,0.6,0.5,0.333333,0.0,0.666667,1.0,1.0,0.75,0.75,0.25,0.666667,0.046598,0.833333,0.0,0.43536,0.209493,1.0,1.0,1.0,1.0,0.508416,0.0,0.0,0.475254,0.0,0.0,0.666667,0.0,0.25,0.333333,0.666667,0.25,1.0,0.0,0.2,0.8,0.168224,0.666667,0.5,0.356841,0.666667,1.0,1.0,0.0,0.149909,0.0,0.0,0.3,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.666667,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
data.to_csv('X_test.csv', index=False)