# House Price Predictions

### libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import pickle as pk

### dataset loading

In [2]:
housing = pd.read_csv('housing.csv')
housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


### viewing number of rows and columns

In [3]:
housing.shape

(545, 13)

### printing dataset statistical analysis

In [4]:
housing.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0,545.0
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,0.693578
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,0.861586
min,1750000.0,1650.0,1.0,1.0,1.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,0.0
50%,4340000.0,4600.0,3.0,1.0,2.0,0.0
75%,5740000.0,6360.0,3.0,2.0,2.0,1.0
max,13300000.0,16200.0,6.0,4.0,4.0,3.0


### check for null values

In [5]:
housing.isnull().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

### dealing with categorical data

In [6]:
le = LabelEncoder()

In [7]:
housing['mainroad'] = le.fit_transform(housing['mainroad'])
housing['guestroom'] = le.fit_transform(housing['guestroom'])
housing['basement'] = le.fit_transform(housing['basement'])
housing['hotwaterheating'] = le.fit_transform(housing['hotwaterheating'])
housing['airconditioning'] = le.fit_transform(housing['airconditioning'])
housing['prefarea'] = le.fit_transform(housing['prefarea'])
housing['furnishingstatus'] = le.fit_transform(housing['furnishingstatus'])

### dividing data into features and lables

In [8]:
X = housing.drop(['price'], axis=1)
y = housing['price']

### making train and test sets 

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)

## The Models

### A: Linear Regression

In [10]:
lr = LinearRegression()

In [11]:
regressor = lr.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

In [12]:
print(r2_score(y_test,y_pred)*100)

60.69340685429513


### B: Random Forest

In [13]:
rf = RandomForestRegressor(n_estimators=500, random_state=42)

In [14]:
regressor2 = rf.fit(X_train, y_train)
y_pred2 = regressor2.predict(X_test)

In [15]:
print(r2_score(y_test,y_pred2)*100)

57.32106696948331


### saving best model

In [16]:
pk.dump(regressor,open('HouseModel.sav','wb'))

### loading the model

In [17]:
HouseModel = pk.load(open('HouseModel.sav','rb'))