#### LINEAR REGRESSION MODEL: HOUSE PRICE PREDICTION

In [3]:
import pandas as pd

In [5]:
area_list = [150,240,350,500,650,700,800,890,1200,1240]
price_list =[200000, 230000, 270000, 350000, 420000, 450000, 500000, 550000, 580000, 600000]

house_df = pd.DataFrame({'Area': area_list, 'Price':price_list })
house_df


Unnamed: 0,Area,Price
0,150,200000
1,240,230000
2,350,270000
3,500,350000
4,650,420000
5,700,450000
6,800,500000
7,890,550000
8,1200,580000
9,1240,600000


#### Splitting the data into training and testing

In [24]:
x = house_df[['Area']] # independent variable/ features/ predictors
y = house_df['Price']# dependent variable / target variable

In [None]:
# training - testing
# 80%  training data
# 20%  testing data

In [53]:
from sklearn.model_selection import train_test_split

In [54]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2)

In [55]:
x_train

Unnamed: 0,Area
5,700
6,800
7,890
8,1200
0,150
4,650
1,240
9,1240


In [56]:
x_test

Unnamed: 0,Area
3,500
2,350


In [57]:
y_test

3    350000
2    270000
Name: Price, dtype: int64

#### MODEL DEVELOPMENT

In [58]:
# importing linear regression model from sklear

In [59]:
from sklearn.linear_model import LinearRegression

#### Object of the model

In [60]:
linear_regression_model = LinearRegression()

In [61]:
type(linear_regression_model)

sklearn.linear_model._base.LinearRegression

#### Model training

In [62]:
linear_regression_model.fit(x_train, y_train)

#### Model prediction - Test Data

In [63]:
y_pred = linear_regression_model.predict(x_test)
y_pred

array([353195.22912743, 296689.49380814])

In [64]:
y_test

3    350000
2    270000
Name: Price, dtype: int64

In [65]:
pred_df = pd.DataFrame({'y_pred':y_pred, 'y_act': y_test})
pred_df

Unnamed: 0,y_pred,y_act
3,353195.229127,350000
2,296689.493808,270000


In [66]:
pred_df['error'] = abs(pred_df['y_pred'] - pred_df['y_act'])
pred_df

Unnamed: 0,y_pred,y_act,error
3,353195.229127,350000,3195.229127
2,296689.493808,270000,26689.493808


In [67]:
pred_df['sq_error'] = pred_df['error']**2
pred_df

Unnamed: 0,y_pred,y_act,error,sq_error
3,353195.229127,350000,3195.229127,10209490.0
2,296689.493808,270000,26689.493808,712329100.0


In [68]:
mae = pred_df['error'].sum()/2
mae

14942.361467785202

In [69]:
mse = pred_df['sq_error'].sum()/2
mse

361269284.455712

In [70]:
rmse = mse**(1/2)
rmse

19007.085112023673

#### Model Evaluation

In [71]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


In [None]:
mean_absolute_error(y_test, y_pred)

In [73]:
mean_squared_error(y_test, y_pred)

361269284.455712

In [74]:
r2_score(y_test, y_pred)

0.77420669721518

In [75]:
Accuracy = r2_score(y_test, y_pred)*100
print('Acuracy of the model = ', round(Accuracy,2),'%')

Acuracy of the model =  77.42 %


#### Model to be used for prediction

In [76]:
linear_regression_model.predict([[1500]])



array([729900.13125606])

In [77]:
linear_regression_model.predict([[2000]])



array([918252.58232038])

In [78]:
area_list2 = [2100, 2300, 2400, 3000, 3500]
for area in area_list2:
    print(f'price of the house with {area} sq.ft. = ', linear_regression_model.predict([[area]])[0])

price of the house with 2100 sq.ft. =  955923.0725332419
price of the house with 2300 sq.ft. =  1031264.0529589682
price of the house with 2400 sq.ft. =  1068934.5431718314
price of the house with 3000 sq.ft. =  1294957.4844490099
price of the house with 3500 sq.ft. =  1483309.9355133253


