<a href="https://colab.research.google.com/github/fordlotfian/Housing-Price/blob/master/Housing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-0.26.1-cp37-none-manylinux1_x86_64.whl (67.4 MB)
[K     |████████████████████████████████| 67.4 MB 51 kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.26.1


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from catboost import CatBoostRegressor
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 
from scipy import stats

# Importing Datasets

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Data Science Magic/Projects/Housing Prices Competition/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Data Science Magic/Projects/Housing Prices Competition/test.csv')

In [None]:
train.shape, test.shape

((1460, 81), (1459, 80))

# Dropping Outliers

In [None]:
train.drop(train[(train['GrLivArea']>4000) & (train['SalePrice']<300000)].index, axis=0 ,inplace=True)

In [None]:
train.shape

(1458, 81)

# Combining Datasets

In [None]:
temp = pd.concat([train, test]).reset_index(drop=True)

In [None]:
temp.shape

(2917, 81)

# Capturing target variable, aswell as ID columns (Won't be used in testing)


In [None]:
train_ID = train['Id']
test_ID = test['Id']
y = train['SalePrice']

# Feature Engineering

In [None]:
temp['Total_Sqft'] = temp['TotalBsmtSF'] + temp['1stFlrSF'] + temp['2ndFlrSF']

# Filling Nulls

In [None]:
temp[['PoolQC', 'MiscFeature', 'Alley', 'Fence', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'MasVnrType', 'MSZoning', 'SaleType', 'Functional']] =\
    temp[['PoolQC', 'MiscFeature', 'Alley', 'Fence', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'MasVnrType', 'MSZoning', 'SaleType', 'Functional']].fillna(value='None')


In [None]:
temp[['GarageYrBlt', 'GarageArea', 'GarageCars', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea']] = \
    temp[['GarageYrBlt', 'GarageArea', 'GarageCars', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea']].fillna(value=0)

# These particular features were miscategorized

In [None]:
temp['MSSubClass'] = temp['MSSubClass'].astype(str)

In [None]:
temp['OverallCond'] = temp['OverallCond'].astype(str)

In [None]:
temp['YrSold'] = temp['YrSold'].astype(str)

In [None]:
temp['MoSold'] = temp['MoSold'].astype(str)

# Filling Remaining NA's that are Numeric

In [None]:
nums = temp.select_dtypes(include=['float64', 'int64'])

In [None]:
nums = nums.apply(lambda x: x.fillna(x.median()))

In [None]:
nums = nums.astype(int)

In [None]:
nums

Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,SalePrice,Total_Sqft
0,1,65,8450,7,2003,2003,196,706,0,150,856,856,854,0,1710,1,0,2,1,3,1,8,0,2003,2,548,0,61,0,0,0,0,0,208500,2566
1,2,80,9600,6,1976,1976,0,978,0,284,1262,1262,0,0,1262,0,1,2,0,3,1,6,1,1976,2,460,298,0,0,0,0,0,0,181500,2524
2,3,68,11250,7,2001,2002,162,486,0,434,920,920,866,0,1786,1,0,2,1,3,1,6,1,2001,2,608,0,42,0,0,0,0,0,223500,2706
3,4,60,9550,7,1915,1970,0,216,0,540,756,961,756,0,1717,1,0,1,0,3,1,7,1,1998,3,642,0,35,272,0,0,0,0,140000,2473
4,5,84,14260,8,2000,2000,350,655,0,490,1145,1145,1053,0,2198,1,0,2,1,4,1,9,1,2000,3,836,192,84,0,0,0,0,0,250000,3343
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2912,2915,21,1936,4,1970,1970,0,0,0,546,546,546,546,0,1092,0,0,1,1,3,1,5,0,0,0,0,0,0,0,0,0,0,0,163000,1638
2913,2916,21,1894,4,1970,1970,0,252,0,294,546,546,546,0,1092,0,0,1,1,3,1,6,0,1970,1,286,0,24,0,0,0,0,0,163000,1638
2914,2917,160,20000,5,1960,1996,0,1224,0,0,1224,1224,0,0,1224,1,0,1,0,4,1,7,1,1960,2,576,474,0,0,0,0,0,0,163000,2448
2915,2918,62,10441,5,1992,1992,0,337,0,575,912,970,0,0,970,0,1,1,0,3,1,6,0,0,0,0,80,32,0,0,0,0,700,163000,1882


# Filling Remaining NA's that are Categorical

In [None]:
cats = temp.select_dtypes(include=['object']).astype('category')

In [None]:
for column in cats.columns:
    cats[column].fillna(cats[column].mode()[0], inplace=True)

In [None]:
cats

Unnamed: 0,MSSubClass,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallCond,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,MoSold,YrSold,SaleType,SaleCondition
0,60,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,2,2008,WD,Normal
1,20,RL,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,8,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,5,2007,WD,Normal
2,60,RL,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,9,2008,WD,Normal
3,70,RL,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,5,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,Y,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,2,2006,WD,Abnorml
4,60,RL,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,12,2008,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2912,160,RM,Pave,,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,2Story,7,Gable,CompShg,CemntBd,CmentBd,,TA,TA,CBlock,TA,TA,No,Unf,Unf,GasA,Gd,Y,SBrkr,TA,Typ,,,,,,Y,,,,6,2006,WD,Normal
2913,160,RM,Pave,,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,5,Gable,CompShg,CemntBd,CmentBd,,TA,TA,CBlock,TA,TA,No,Rec,Unf,GasA,TA,Y,SBrkr,TA,Typ,,CarPort,Unf,TA,TA,Y,,,,4,2006,WD,Abnorml
2914,20,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,7,Gable,CompShg,VinylSd,VinylSd,,TA,TA,CBlock,TA,TA,No,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Detchd,Unf,TA,TA,Y,,,,9,2006,WD,Abnorml
2915,85,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,5,Gable,CompShg,HdBoard,Wd Shng,,TA,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,TA,Y,SBrkr,TA,Typ,,,,,,Y,,MnPrv,Shed,7,2006,WD,Normal


# Recombining Datasets where Nulls have been dealt with

In [None]:
temp = pd.merge(nums, cats, left_index=True, right_index=True)

In [None]:
temp.drop(columns=['Id', 'SalePrice', 'Utilities'], axis=1, inplace=True)

# Splitting Dataset back into original rows

In [None]:
train = temp.iloc[:1458]
train

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,Total_Sqft,MSSubClass,MSZoning,Street,Alley,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallCond,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,MoSold,YrSold,SaleType,SaleCondition
0,65,8450,7,2003,2003,196,706,0,150,856,856,854,0,1710,1,0,2,1,3,1,8,0,2003,2,548,0,61,0,0,0,0,0,2566,60,RL,Pave,,Reg,Lvl,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,2,2008,WD,Normal
1,80,9600,6,1976,1976,0,978,0,284,1262,1262,0,0,1262,0,1,2,0,3,1,6,1,1976,2,460,298,0,0,0,0,0,0,2524,20,RL,Pave,,Reg,Lvl,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,8,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,5,2007,WD,Normal
2,68,11250,7,2001,2002,162,486,0,434,920,920,866,0,1786,1,0,2,1,3,1,6,1,2001,2,608,0,42,0,0,0,0,0,2706,60,RL,Pave,,IR1,Lvl,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,9,2008,WD,Normal
3,60,9550,7,1915,1970,0,216,0,540,756,961,756,0,1717,1,0,1,0,3,1,7,1,1998,3,642,0,35,272,0,0,0,0,2473,70,RL,Pave,,IR1,Lvl,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,5,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,Y,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,2,2006,WD,Abnorml
4,84,14260,8,2000,2000,350,655,0,490,1145,1145,1053,0,2198,1,0,2,1,4,1,9,1,2000,3,836,192,84,0,0,0,0,0,3343,60,RL,Pave,,IR1,Lvl,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,12,2008,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1453,62,7917,6,1999,2000,0,0,0,953,953,953,694,0,1647,0,0,2,1,3,1,7,1,1999,2,460,0,40,0,0,0,0,0,2600,60,RL,Pave,,Reg,Lvl,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,,TA,TA,PConc,Gd,TA,No,Unf,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,8,2007,WD,Normal
1454,85,13175,6,1978,1988,119,790,163,589,1542,2073,0,0,2073,1,0,2,0,3,1,7,2,1978,2,500,349,0,0,0,0,0,0,3615,20,RL,Pave,,Reg,Lvl,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,Gable,CompShg,Plywood,Plywood,Stone,TA,TA,CBlock,Gd,TA,No,ALQ,Rec,GasA,TA,Y,SBrkr,TA,Min1,TA,Attchd,Unf,TA,TA,Y,,MnPrv,,2,2010,WD,Normal
1455,66,9042,7,1941,2006,0,275,0,877,1152,1188,1152,0,2340,0,0,2,0,4,1,9,2,1941,1,252,0,60,0,0,0,0,2500,3492,70,RL,Pave,,Reg,Lvl,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,9,Gable,CompShg,CemntBd,CmentBd,,Ex,Gd,Stone,TA,Gd,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,Gd,Attchd,RFn,TA,TA,Y,,GdPrv,Shed,5,2010,WD,Normal
1456,68,9717,5,1950,1996,0,49,1029,0,1078,1078,0,0,1078,1,0,1,0,2,1,5,0,1950,1,240,366,0,112,0,0,0,0,2156,20,RL,Pave,,Reg,Lvl,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,Hip,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,TA,TA,Mn,GLQ,Rec,GasA,Gd,Y,FuseA,Gd,Typ,,Attchd,Unf,TA,TA,Y,,,,4,2010,WD,Normal


In [None]:
test = temp.iloc[1458:]
test

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,Total_Sqft,MSSubClass,MSZoning,Street,Alley,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallCond,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,MoSold,YrSold,SaleType,SaleCondition
1458,80,11622,5,1961,1961,0,468,144,270,882,896,0,0,896,0,0,1,0,2,1,5,0,1961,1,730,140,0,0,0,120,0,0,1778,20,RH,Pave,,Reg,Lvl,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,6,Gable,CompShg,VinylSd,VinylSd,,TA,TA,CBlock,TA,TA,No,Rec,LwQ,GasA,TA,Y,SBrkr,TA,Typ,,Attchd,Unf,TA,TA,Y,,MnPrv,,6,2010,WD,Normal
1459,81,14267,6,1958,1958,108,923,0,406,1329,1329,0,0,1329,0,0,1,1,3,1,6,0,1958,1,312,393,36,0,0,0,0,12500,2658,20,RL,Pave,,IR1,Lvl,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,TA,TA,CBlock,TA,TA,No,ALQ,Unf,GasA,TA,Y,SBrkr,Gd,Typ,,Attchd,Unf,TA,TA,Y,,,Gar2,6,2010,WD,Normal
1460,74,13830,5,1997,1998,0,791,0,137,928,928,701,0,1629,0,0,2,1,3,1,6,1,1997,2,482,212,34,0,0,0,0,0,2557,60,RL,Pave,,IR1,Lvl,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,Gable,CompShg,VinylSd,VinylSd,,TA,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Gd,Y,SBrkr,TA,Typ,TA,Attchd,Fin,TA,TA,Y,,MnPrv,,3,2010,WD,Normal
1461,78,9978,6,1998,1998,20,602,0,324,926,926,678,0,1604,0,0,2,1,3,1,7,1,1998,2,470,360,36,0,0,0,0,0,2530,60,RL,Pave,,IR1,Lvl,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,Gable,CompShg,VinylSd,VinylSd,BrkFace,TA,TA,PConc,TA,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,Gd,Attchd,Fin,TA,TA,Y,,,,6,2010,WD,Normal
1462,43,5005,8,1992,1992,0,263,0,1017,1280,1280,0,0,1280,0,0,2,0,2,1,5,0,1992,2,506,0,82,0,0,144,0,0,2560,120,RL,Pave,,IR1,HLS,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,5,Gable,CompShg,HdBoard,HdBoard,,Gd,TA,PConc,Gd,TA,No,ALQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,1,2010,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2912,21,1936,4,1970,1970,0,0,0,546,546,546,546,0,1092,0,0,1,1,3,1,5,0,0,0,0,0,0,0,0,0,0,0,1638,160,RM,Pave,,Reg,Lvl,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,2Story,7,Gable,CompShg,CemntBd,CmentBd,,TA,TA,CBlock,TA,TA,No,Unf,Unf,GasA,Gd,Y,SBrkr,TA,Typ,,,,,,Y,,,,6,2006,WD,Normal
2913,21,1894,4,1970,1970,0,252,0,294,546,546,546,0,1092,0,0,1,1,3,1,6,0,1970,1,286,0,24,0,0,0,0,0,1638,160,RM,Pave,,Reg,Lvl,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,5,Gable,CompShg,CemntBd,CmentBd,,TA,TA,CBlock,TA,TA,No,Rec,Unf,GasA,TA,Y,SBrkr,TA,Typ,,CarPort,Unf,TA,TA,Y,,,,4,2006,WD,Abnorml
2914,160,20000,5,1960,1996,0,1224,0,0,1224,1224,0,0,1224,1,0,1,0,4,1,7,1,1960,2,576,474,0,0,0,0,0,0,2448,20,RL,Pave,,Reg,Lvl,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,7,Gable,CompShg,VinylSd,VinylSd,,TA,TA,CBlock,TA,TA,No,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Detchd,Unf,TA,TA,Y,,,,9,2006,WD,Abnorml
2915,62,10441,5,1992,1992,0,337,0,575,912,970,0,0,970,0,1,1,0,3,1,6,0,0,0,0,80,32,0,0,0,0,700,1882,85,RL,Pave,,Reg,Lvl,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,5,Gable,CompShg,HdBoard,Wd Shng,,TA,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,TA,Y,SBrkr,TA,Typ,,,,,,Y,,MnPrv,Shed,7,2006,WD,Normal


#Train Test Split

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(train, y, test_size=0.20, random_state=0)

# Defining Categoricals for Catboost

In [None]:
kitty = ['MSSubClass', 'MSZoning', 'Street', 'Alley',
       'LotShape', 'LandContour', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
       'OverallCond', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd',
       'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional',
       'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond',
       'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'MoSold', 'YrSold',
       'SaleType', 'SaleCondition']

# Finding Best HyperParameters

In [None]:
model = CatBoostRegressor(cat_features=kitty, verbose=False, task_type="GPU")

params = {'depth':[3,1,2,6,4,5,7,8,9,10],
          'iterations':[250,100,500,1000],
          'learning_rate':[0.03,0.001,0.01,0.1,0.2,0.3], 
          'l2_leaf_reg':[3,1,5,10,100]}

In [None]:
rscv = model.randomized_search(params ,xtrain , ytrain, n_iter=20, verbose=False)

In [None]:
rscv

In [None]:
# In conclusion, original parameters are best

# Checking Over/Under fitting

In [None]:
def automation_rmse(model, x, y, xtest, ytest):
    model.fit(x, y)
    pred = model.predict(xtest)
    return np.sqrt(mean_squared_error(pred, ytest))

In [None]:
automation_rmse(CatBoostRegressor(cat_features=kitty, verbose=False, task_type="GPU")\
    , xtrain, ytrain, xtest, ytest)

24671.075019541364

In [None]:
automation_rmse(CatBoostRegressor(cat_features=kitty, verbose=False, task_type='GPU')\
    , xtest, ytest, xtrain, ytrain)

30785.27216901967

# Actual Prediction, and upload to Kaggle

In [None]:
def automation_predict(model, x, y):
    model.fit(x, y)
    return pd.DataFrame(model.predict(test), columns=['SalePrice'])

In [None]:
prediction = automation_predict(CatBoostRegressor(cat_features=kitty, verbose=False, depth=5, l2_leaf_reg=1, learning_rate=0.1, iterations=500), train, y)

In [None]:
kaggle = pd.merge(prediction, test_ID, left_index=True, right_index=True)
kaggle


In [None]:
kaggle.to_csv('Submission.csv', index=False)