# A Notebook to Predict the final price of each home

## Import Dependencies

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

## Load the Data

In [2]:
df = pd.read_csv("Data/train.csv")
test_data = pd.read_csv("Data/test.csv")

In [3]:
print("Shape is: ", df.shape)
df.head()

Shape is:  (1460, 81)


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [4]:
print("Shape is: ", test_data.shape)
test_data.head()

Shape is:  (1459, 80)


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

## Encode Categorical Data

In [6]:
df['MSZoning'].value_counts()

RL         1151
RM          218
FV           65
RH           16
C (all)      10
Name: MSZoning, dtype: int64

In [7]:
# Check Categorical Variables
cat = df.select_dtypes(include='O').keys()
cat

Index(['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities',
       'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation',
       'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual',
       'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
       'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature',
       'SaleType', 'SaleCondition'],
      dtype='object')

In [8]:
# Encode Categorical Variables
for column_name in df.select_dtypes(include='O').keys():
    print("Encoding Column: ", column_name)
    le = LabelEncoder()
    df[column_name] = le.fit_transform(df[column_name].astype(str))

# Encode Categorical Variables
for column_name in test_data.select_dtypes(include='O').keys():
    print("Encoding Column: ", column_name)
    le = LabelEncoder()
    test_data[column_name] = le.fit_transform(test_data[column_name].astype(str))

Encoding Column:  MSZoning
Encoding Column:  Street
Encoding Column:  Alley
Encoding Column:  LotShape
Encoding Column:  LandContour
Encoding Column:  Utilities
Encoding Column:  LotConfig
Encoding Column:  LandSlope
Encoding Column:  Neighborhood
Encoding Column:  Condition1
Encoding Column:  Condition2
Encoding Column:  BldgType
Encoding Column:  HouseStyle
Encoding Column:  RoofStyle
Encoding Column:  RoofMatl
Encoding Column:  Exterior1st
Encoding Column:  Exterior2nd
Encoding Column:  MasVnrType
Encoding Column:  ExterQual
Encoding Column:  ExterCond
Encoding Column:  Foundation
Encoding Column:  BsmtQual
Encoding Column:  BsmtCond
Encoding Column:  BsmtExposure
Encoding Column:  BsmtFinType1
Encoding Column:  BsmtFinType2
Encoding Column:  Heating
Encoding Column:  HeatingQC
Encoding Column:  CentralAir
Encoding Column:  Electrical
Encoding Column:  KitchenQual
Encoding Column:  Functional
Encoding Column:  FireplaceQu
Encoding Column:  GarageType
Encoding Column:  GarageFinish
E

In [9]:
del df['Id']

In [10]:
df.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,3,65.0,8450,1,2,3,3,0,4,...,0,3,4,4,0,2,2008,8,4,208500
1,20,3,80.0,9600,1,2,3,3,0,2,...,0,3,4,4,0,5,2007,8,4,181500
2,60,3,68.0,11250,1,2,0,3,0,4,...,0,3,4,4,0,9,2008,8,4,223500
3,70,3,60.0,9550,1,2,0,3,0,0,...,0,3,4,4,0,2,2006,8,0,140000
4,60,3,84.0,14260,1,2,0,3,0,2,...,0,3,4,4,0,12,2008,8,4,250000


In [11]:
test_data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,2,80.0,11622,1,2,3,3,0,...,120,0,2,2,3,0,6,2010,8,4
1,1462,20,3,81.0,14267,1,2,0,3,0,...,0,0,2,4,0,12500,6,2010,8,4
2,1463,60,3,74.0,13830,1,2,0,3,0,...,0,0,2,2,3,0,3,2010,8,4
3,1464,60,3,78.0,9978,1,2,0,3,0,...,0,0,2,4,3,0,6,2010,8,4
4,1465,120,3,43.0,5005,1,2,0,1,0,...,144,0,2,4,3,0,1,2010,8,4


## Handle Missing Values

In [12]:
df.isnull().sum()

MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
Street             0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 80, dtype: int64

In [13]:
# Drop Features with High Missing Values
df = df.drop(['FireplaceQu','Fence','Alley','MiscFeature','PoolQC'], axis=1)
test_data = test_data.drop(['FireplaceQu','Fence','Alley','MiscFeature','PoolQC'], axis=1)

In [14]:
# Fill Nan Values of Categorical features with Mode, others with mean

for column_name in df.columns:
    print("Evaluating Column: ", column_name)
    if column_name in df.select_dtypes(include='O').keys():
        df[column_name] = df[column_name].fillna(df[column_name].mode())
    else:
        df[column_name] = df[column_name].fillna(df[column_name].mean())

for column_name in test_data.columns:
    print("Evaluating Column: ", column_name)
    if column_name in test_data.select_dtypes(include='O').keys():
        test_data[column_name] = test_data[column_name].fillna(test_data[column_name].mode())
    else:
        test_data[column_name] = test_data[column_name].fillna(test_data[column_name].mean())

Evaluating Column:  MSSubClass
Evaluating Column:  MSZoning
Evaluating Column:  LotFrontage
Evaluating Column:  LotArea
Evaluating Column:  Street
Evaluating Column:  LotShape
Evaluating Column:  LandContour
Evaluating Column:  Utilities
Evaluating Column:  LotConfig
Evaluating Column:  LandSlope
Evaluating Column:  Neighborhood
Evaluating Column:  Condition1
Evaluating Column:  Condition2
Evaluating Column:  BldgType
Evaluating Column:  HouseStyle
Evaluating Column:  OverallQual
Evaluating Column:  OverallCond
Evaluating Column:  YearBuilt
Evaluating Column:  YearRemodAdd
Evaluating Column:  RoofStyle
Evaluating Column:  RoofMatl
Evaluating Column:  Exterior1st
Evaluating Column:  Exterior2nd
Evaluating Column:  MasVnrType
Evaluating Column:  MasVnrArea
Evaluating Column:  ExterQual
Evaluating Column:  ExterCond
Evaluating Column:  Foundation
Evaluating Column:  BsmtQual
Evaluating Column:  BsmtCond
Evaluating Column:  BsmtExposure
Evaluating Column:  BsmtFinType1
Evaluating Column:  

In [15]:
df.isnull().sum()

MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
Street           0
                ..
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
SalePrice        0
Length: 75, dtype: int64

In [16]:
test_data.isnull().sum()

Id               0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
                ..
MiscVal          0
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
Length: 75, dtype: int64

In [17]:
print("Shape of Train Data: ", df.shape)
df.head()

Shape of Train Data:  (1460, 75)


Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,3,65.0,8450,1,3,3,0,4,0,...,0,0,0,0,0,2,2008,8,4,208500
1,20,3,80.0,9600,1,3,3,0,2,0,...,0,0,0,0,0,5,2007,8,4,181500
2,60,3,68.0,11250,1,0,3,0,4,0,...,0,0,0,0,0,9,2008,8,4,223500
3,70,3,60.0,9550,1,0,3,0,0,0,...,272,0,0,0,0,2,2006,8,0,140000
4,60,3,84.0,14260,1,0,3,0,2,0,...,0,0,0,0,0,12,2008,8,4,250000


In [18]:
print("Shape of Test Data: ", test_data.shape)
test_data.head()

Shape of Test Data:  (1459, 75)


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,...,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,2,80.0,11622,1,3,3,0,4,...,0,0,0,120,0,0,6,2010,8,4
1,1462,20,3,81.0,14267,1,0,3,0,0,...,36,0,0,0,0,12500,6,2010,8,4
2,1463,60,3,74.0,13830,1,0,3,0,4,...,34,0,0,0,0,0,3,2010,8,4
3,1464,60,3,78.0,9978,1,0,3,0,4,...,36,0,0,0,0,0,6,2010,8,4
4,1465,120,3,43.0,5005,1,0,1,0,4,...,82,0,0,144,0,0,1,2010,8,4


## Train-Test Split

In [19]:
predict = "SalePrice"
df_train = df.drop(columns=[predict])
y = df[[predict]].values.ravel()

In [20]:
X_train, X_test, y_train, y_test = train_test_split(df_train, y, test_size=0.20, random_state=42)

In [21]:
X_train.shape

(1168, 74)

In [22]:
y_train.shape

(1168,)

## Build the Model

In [23]:
from xgboost import XGBRegressor
model_1 = XGBRegressor()
model_1.fit(X_train, y_train) 
print(model_1.score(X_test, y_test))

0.9006086552997412


In [24]:
from sklearn.ensemble import RandomForestRegressor
model_2 = RandomForestRegressor()
model_2.fit(X_train, y_train) 
print(model_2.score(X_test, y_test))

0.893830791393167


In [25]:
from sklearn.ensemble import GradientBoostingRegressor
model_3 = GradientBoostingRegressor()
model_3.fit(X_train, y_train) 
print(model_3.score(X_test, y_test))

0.8970433553478867


In [26]:
from lightgbm import LGBMRegressor
model_4 = LGBMRegressor()
model_4.fit(X_train, y_train) 
print(model_4.score(X_test, y_test))

0.894714009017131


In [27]:
from catboost import CatBoostRegressor
model_5 = CatBoostRegressor()
model_5.fit(X_train, y_train) 
print(model_5.score(X_test, y_test))

Learning rate set to 0.04196
0:	learn: 75398.8029653	total: 153ms	remaining: 2m 32s
1:	learn: 73613.9383250	total: 167ms	remaining: 1m 23s
2:	learn: 71596.7067499	total: 179ms	remaining: 59.5s
3:	learn: 69763.7043970	total: 187ms	remaining: 46.5s
4:	learn: 68013.1809773	total: 192ms	remaining: 38.2s
5:	learn: 66208.8848605	total: 196ms	remaining: 32.5s
6:	learn: 64648.9423789	total: 200ms	remaining: 28.4s
7:	learn: 63089.0906038	total: 204ms	remaining: 25.3s
8:	learn: 61492.7036705	total: 207ms	remaining: 22.8s
9:	learn: 60202.0134110	total: 210ms	remaining: 20.8s
10:	learn: 58812.3628269	total: 212ms	remaining: 19.1s
11:	learn: 57437.0319175	total: 216ms	remaining: 17.8s
12:	learn: 56190.7249039	total: 219ms	remaining: 16.6s
13:	learn: 54977.0342646	total: 221ms	remaining: 15.6s
14:	learn: 53811.5642861	total: 224ms	remaining: 14.7s
15:	learn: 52780.1097804	total: 227ms	remaining: 13.9s
16:	learn: 51683.3958203	total: 230ms	remaining: 13.3s
17:	learn: 50509.2434255	total: 232ms	remain

153:	learn: 18646.3984228	total: 681ms	remaining: 3.74s
154:	learn: 18595.3889421	total: 684ms	remaining: 3.73s
155:	learn: 18549.5925976	total: 687ms	remaining: 3.72s
156:	learn: 18494.1876784	total: 691ms	remaining: 3.71s
157:	learn: 18436.4779514	total: 694ms	remaining: 3.7s
158:	learn: 18385.3087168	total: 697ms	remaining: 3.69s
159:	learn: 18348.4557533	total: 700ms	remaining: 3.68s
160:	learn: 18316.2621099	total: 704ms	remaining: 3.67s
161:	learn: 18240.1221707	total: 707ms	remaining: 3.66s
162:	learn: 18200.7046050	total: 711ms	remaining: 3.65s
163:	learn: 18183.0791635	total: 714ms	remaining: 3.64s
164:	learn: 18141.5718626	total: 718ms	remaining: 3.63s
165:	learn: 18095.7721397	total: 721ms	remaining: 3.62s
166:	learn: 18061.8241419	total: 724ms	remaining: 3.61s
167:	learn: 18037.4781796	total: 727ms	remaining: 3.6s
168:	learn: 17990.4726593	total: 730ms	remaining: 3.59s
169:	learn: 17943.4516172	total: 733ms	remaining: 3.58s
170:	learn: 17912.4811112	total: 736ms	remaining: 

301:	learn: 13759.0918427	total: 1.31s	remaining: 3.03s
302:	learn: 13729.9161621	total: 1.32s	remaining: 3.03s
303:	learn: 13699.1968944	total: 1.32s	remaining: 3.03s
304:	learn: 13678.8820545	total: 1.32s	remaining: 3.02s
305:	learn: 13629.5382217	total: 1.33s	remaining: 3.02s
306:	learn: 13608.5103254	total: 1.33s	remaining: 3.01s
307:	learn: 13583.8529774	total: 1.34s	remaining: 3.01s
308:	learn: 13560.3431697	total: 1.34s	remaining: 3s
309:	learn: 13531.1803836	total: 1.35s	remaining: 3s
310:	learn: 13513.5296019	total: 1.35s	remaining: 3s
311:	learn: 13477.0094285	total: 1.36s	remaining: 2.99s
312:	learn: 13448.2169479	total: 1.36s	remaining: 2.99s
313:	learn: 13400.6463313	total: 1.37s	remaining: 3s
314:	learn: 13373.3662530	total: 1.38s	remaining: 3s
315:	learn: 13350.4701452	total: 1.39s	remaining: 3s
316:	learn: 13325.7250120	total: 1.4s	remaining: 3.02s
317:	learn: 13294.7663290	total: 1.41s	remaining: 3.02s
318:	learn: 13259.3459664	total: 1.42s	remaining: 3.02s
319:	learn:

473:	learn: 10384.0956966	total: 2.14s	remaining: 2.38s
474:	learn: 10369.8160952	total: 2.14s	remaining: 2.37s
475:	learn: 10347.9632024	total: 2.15s	remaining: 2.37s
476:	learn: 10344.9681851	total: 2.15s	remaining: 2.36s
477:	learn: 10330.9793259	total: 2.16s	remaining: 2.36s
478:	learn: 10319.5275335	total: 2.16s	remaining: 2.35s
479:	learn: 10308.6279256	total: 2.17s	remaining: 2.35s
480:	learn: 10295.6813228	total: 2.17s	remaining: 2.34s
481:	learn: 10293.9394298	total: 2.18s	remaining: 2.34s
482:	learn: 10269.4308877	total: 2.18s	remaining: 2.34s
483:	learn: 10260.4088417	total: 2.19s	remaining: 2.33s
484:	learn: 10233.4665149	total: 2.19s	remaining: 2.33s
485:	learn: 10232.2867857	total: 2.2s	remaining: 2.32s
486:	learn: 10221.8989815	total: 2.2s	remaining: 2.32s
487:	learn: 10204.8221156	total: 2.2s	remaining: 2.31s
488:	learn: 10190.2561256	total: 2.21s	remaining: 2.31s
489:	learn: 10175.6484031	total: 2.21s	remaining: 2.3s
490:	learn: 10165.4779525	total: 2.21s	remaining: 2.

632:	learn: 8397.6297519	total: 2.97s	remaining: 1.72s
633:	learn: 8392.8461348	total: 2.98s	remaining: 1.72s
634:	learn: 8375.6264123	total: 2.98s	remaining: 1.71s
635:	learn: 8368.8008429	total: 2.98s	remaining: 1.71s
636:	learn: 8366.1832589	total: 2.99s	remaining: 1.7s
637:	learn: 8350.3509010	total: 2.99s	remaining: 1.7s
638:	learn: 8337.3173521	total: 3s	remaining: 1.69s
639:	learn: 8323.1248435	total: 3s	remaining: 1.69s
640:	learn: 8312.8214505	total: 3s	remaining: 1.68s
641:	learn: 8305.7312874	total: 3.01s	remaining: 1.68s
642:	learn: 8301.2322481	total: 3.01s	remaining: 1.67s
643:	learn: 8299.6303728	total: 3.02s	remaining: 1.67s
644:	learn: 8298.4344815	total: 3.02s	remaining: 1.66s
645:	learn: 8297.3865222	total: 3.02s	remaining: 1.66s
646:	learn: 8286.2860143	total: 3.03s	remaining: 1.65s
647:	learn: 8277.2718399	total: 3.03s	remaining: 1.65s
648:	learn: 8259.0950898	total: 3.04s	remaining: 1.64s
649:	learn: 8249.0409393	total: 3.04s	remaining: 1.64s
650:	learn: 8232.0626

787:	learn: 7090.7841949	total: 3.65s	remaining: 982ms
788:	learn: 7085.0541983	total: 3.65s	remaining: 977ms
789:	learn: 7074.6213192	total: 3.66s	remaining: 972ms
790:	learn: 7061.2078165	total: 3.66s	remaining: 967ms
791:	learn: 7056.3217129	total: 3.66s	remaining: 962ms
792:	learn: 7053.3202177	total: 3.67s	remaining: 957ms
793:	learn: 7049.1833811	total: 3.67s	remaining: 952ms
794:	learn: 7048.4896304	total: 3.67s	remaining: 947ms
795:	learn: 7043.8456013	total: 3.67s	remaining: 942ms
796:	learn: 7040.3430813	total: 3.68s	remaining: 937ms
797:	learn: 7035.9617210	total: 3.68s	remaining: 932ms
798:	learn: 7027.4809024	total: 3.69s	remaining: 927ms
799:	learn: 7020.0594290	total: 3.69s	remaining: 922ms
800:	learn: 7008.1862298	total: 3.69s	remaining: 917ms
801:	learn: 7000.1041794	total: 3.69s	remaining: 912ms
802:	learn: 6985.9375802	total: 3.7s	remaining: 908ms
803:	learn: 6972.0343950	total: 3.7s	remaining: 903ms
804:	learn: 6965.6277249	total: 3.71s	remaining: 898ms
805:	learn: 

954:	learn: 5925.5329852	total: 4.48s	remaining: 211ms
955:	learn: 5916.9045663	total: 4.48s	remaining: 206ms
956:	learn: 5913.7109501	total: 4.48s	remaining: 201ms
957:	learn: 5905.7929053	total: 4.49s	remaining: 197ms
958:	learn: 5905.2776209	total: 4.49s	remaining: 192ms
959:	learn: 5899.5847639	total: 4.5s	remaining: 187ms
960:	learn: 5892.4167805	total: 4.5s	remaining: 183ms
961:	learn: 5886.2907949	total: 4.51s	remaining: 178ms
962:	learn: 5879.3536668	total: 4.51s	remaining: 173ms
963:	learn: 5874.7229362	total: 4.52s	remaining: 169ms
964:	learn: 5871.8817587	total: 4.52s	remaining: 164ms
965:	learn: 5868.6953762	total: 4.52s	remaining: 159ms
966:	learn: 5867.1039275	total: 4.53s	remaining: 155ms
967:	learn: 5863.7210723	total: 4.53s	remaining: 150ms
968:	learn: 5855.2256866	total: 4.54s	remaining: 145ms
969:	learn: 5843.7867335	total: 4.54s	remaining: 140ms
970:	learn: 5835.9578303	total: 4.54s	remaining: 136ms
971:	learn: 5833.9342942	total: 4.55s	remaining: 131ms
972:	learn: 

In [28]:
from sklearn.linear_model import LinearRegression
model_6 = LinearRegression()
model_6.fit(X_train, y_train) 
print(model_6.score(X_test, y_test))

0.849021949220566


## Test The Model

### Final Model is an Ensemble of all 6 ML models with Mean of Output of all Models taken

In [29]:
results_df = pd.DataFrame(columns = ["Id", "SalePrice"])
count = 0
for index, row in test_data.iterrows():
    print(count)
    count = count + 1
    row = row.to_frame()
    row = row.T
    id = row["Id"]
    row = row.drop(["Id"], axis = 1)
    prediction = (model_1.predict(row)[0] + model_2.predict(row)[0] + model_3.predict(row)[0] + model_4.predict(row)[0] + model_5.predict(row)[0] + model_6.predict(row)[0] ) /6
    to_append = [list(id)[0], prediction]
    a_series = pd.Series(to_append, index = results_df.columns)
    results_df = results_df.append(a_series, ignore_index=True)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [30]:
print("Shape of Train Data: ", results_df.shape)
results_df.head()

Shape of Train Data:  (1459, 2)


Unnamed: 0,Id,SalePrice
0,1461.0,121111.089357
1,1462.0,158180.279399
2,1463.0,176967.155557
3,1464.0,185440.782859
4,1465.0,197339.962261


In [31]:
results_df["Id"] = results_df["Id"].astype(int)

In [32]:
results_df.dtypes

Id             int32
SalePrice    float64
dtype: object

In [33]:
results_df.to_csv("Results.csv", index = False, encoding='utf-8')