In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
HouseDF = pd.read_csv('HousePricePreciction.csv')

In [5]:
#Finding No. of Categorical variables and its column names

objects = (HouseDF.dtypes=='object')

object_cols = list(objects[objects].index)

print("number of Categorical variables =",len(object_cols),"\n","Categorical Variables are:", object_cols)

number of Categorical variables = 4 
 Categorical Variables are: ['MSZoning', 'LotConfig', 'BldgType', 'Exterior1st']


In [6]:
#removing ID column from the dataset
HouseDF.drop(['Id'],axis=1,inplace=True)

In [7]:
#replacing SalePrice values eith its ,mean values to make the distribution symmetric

HouseDF['SalePrice'] = HouseDF['SalePrice'].fillna(HouseDF['SalePrice'].mean())

In [8]:
#Drop records with null values

new_HouseDF = HouseDF.dropna()

In [9]:
#To check wheather the empty values are still present in the dataset

new_HouseDF.isnull().sum()

MSSubClass      0
MSZoning        0
LotArea         0
LotConfig       0
BldgType        0
OverallCond     0
YearBuilt       0
YearRemodAdd    0
Exterior1st     0
BsmtFinSF2      0
TotalBsmtSF     0
SalePrice       0
dtype: int64

In [10]:
#Using OneHotEncoder to label categorical faetures

from sklearn.preprocessing import OneHotEncoder, StandardScaler

c = (new_HouseDF.dtypes=='object')
object_cols = list(c[c].index)
print("Categorical variables :",object_cols,"\n","No. of categorical features :",len(object_cols))

Categorical variables : ['MSZoning', 'LotConfig', 'BldgType', 'Exterior1st'] 
 No. of categorical features : 4


In [13]:
#Applying onehotencoder to the categorical features

OH_enc = OneHotEncoder(sparse=False)
OH_cols = pd.DataFrame(OH_enc.fit_transform(new_HouseDF[object_cols]))
OH_cols.index = new_HouseDF.index
OH_cols.columns = OH_enc.get_feature_names_out()
DF_final = new_HouseDF.drop(object_cols,axis=1)
DF_final = pd.concat([DF_final,OH_cols],axis=1)



In [14]:
# Splitting datset into train data and test data

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

X = DF_final.drop(['SalePrice'], axis=1)
Y = DF_final['SalePrice']

X_train, X_valid, Y_train, Y_valid = train_test_split(X,Y,train_size = 0.8, test_size = 0.2, random_state= 0)

In [15]:
# Model and Accuracy

#1.SVM (SUPPORT VECTOR MACHINE)

from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import mean_absolute_percentage_error

model_SVR = svm.SVR()
model_SVR.fit(X_train,Y_train)

Y_predict = model_SVR.predict(X_valid)

print(mean_absolute_percentage_error(Y_valid,Y_predict))


0.1870512931870423


In [16]:
#2.Random Forest Regression

from sklearn.ensemble import RandomForestRegressor

model_RFR =RandomForestRegressor(n_estimators=10)
model_RFR.fit(X_train,Y_train)

Y_predict = model_RFR.predict(X_valid)

mean_absolute_percentage_error(Y_valid, Y_predict)

0.18564483129827689

In [17]:
#3. Linear Regression

from sklearn.linear_model import LinearRegression


model_LR= LinearRegression()
model_LR.fit(X_train,Y_train)

Y_predict = model_LR.predict(X_valid)

print(mean_absolute_percentage_error(Y_valid, Y_predict))

0.18741683841600051


In [18]:
#4.CatBoost Classifier

from catboost import CatBoostRegressor
from sklearn.metrics import r2_score

cb_MDL = CatBoostRegressor()
cb_MDL.fit(X_train, Y_train)
pred = cb_MDL.predict(X_valid)

cb_r2_score = r2_score(Y_valid,pred )
cb_r2_score

Learning rate set to 0.046797
0:	learn: 56473.5753002	total: 155ms	remaining: 2m 34s
1:	learn: 55780.1567575	total: 158ms	remaining: 1m 18s
2:	learn: 55060.9599505	total: 160ms	remaining: 53.3s
3:	learn: 54456.1126921	total: 163ms	remaining: 40.7s
4:	learn: 53901.1464265	total: 166ms	remaining: 33s
5:	learn: 53334.6062357	total: 169ms	remaining: 28s
6:	learn: 52824.4943646	total: 172ms	remaining: 24.3s
7:	learn: 52381.4267520	total: 175ms	remaining: 21.7s
8:	learn: 51841.0364316	total: 177ms	remaining: 19.5s
9:	learn: 51281.5852560	total: 180ms	remaining: 17.8s
10:	learn: 50825.3817438	total: 182ms	remaining: 16.4s
11:	learn: 50477.2609796	total: 184ms	remaining: 15.2s
12:	learn: 50081.4097641	total: 187ms	remaining: 14.2s
13:	learn: 49663.6380360	total: 190ms	remaining: 13.4s
14:	learn: 49334.5782662	total: 192ms	remaining: 12.6s
15:	learn: 48967.5221026	total: 196ms	remaining: 12s
16:	learn: 48676.4063222	total: 198ms	remaining: 11.5s
17:	learn: 48359.1169955	total: 217ms	remaining: 

0.38351169878113034