In [66]:
import pandas as pd
import numpy as np

In [67]:
#Importing Cleaned DataSet
data = pd.read_csv('../Csv/Cleaned_Data.csv')

In [68]:
#Droped the unwanted coloumn on DataSet
data.drop(columns=['Unnamed: 0'],inplace = True)

In [69]:
data.head()

Unnamed: 0,location,total_sqft,bath,price,bhk
0,1st Block Jayanagar,2850.0,4.0,428.0,4
1,1st Block Jayanagar,1630.0,3.0,194.0,3
2,1st Block Jayanagar,1875.0,2.0,235.0,3
3,1st Block Jayanagar,1200.0,2.0,130.0,3
4,1st Block Jayanagar,1235.0,2.0,148.0,2


In [70]:
data.describe()

Unnamed: 0,total_sqft,bath,price,bhk
count,7361.0,7361.0,7361.0,7361.0
mean,1496.942529,2.448173,99.093113,2.500611
std,865.78199,1.011515,93.336841,0.929312
min,300.0,1.0,10.0,1.0
25%,1096.0,2.0,50.0,2.0
50%,1260.0,2.0,73.2,2.0
75%,1680.0,3.0,113.0,3.0
max,30000.0,16.0,2200.0,16.0


# PREDICTOR

In [71]:
X = data.drop(columns=['price'])
y = data['price']

In [72]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [73]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [74]:
print(X_train.shape)

(5888, 4)


In [75]:
print(X_test.shape)

(1473, 4)


## LINEAR REGRESSION

In [76]:
column_trans = make_column_transformer((OneHotEncoder(sparse=False),['location']),remainder='passthrough')

In [77]:
scaler = StandardScaler()

In [78]:
lr = LinearRegression(normalize=True)

In [79]:
pipe = make_pipeline(column_trans,scaler,lr)

In [80]:
pipe.fit(X_train,y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  ['location'])])),
                ('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression(normalize=True))])

In [81]:
y_pred_lr = pipe.predict(X_test)

In [82]:
r2_score(y_test, y_pred_lr)

0.823377411825468

## APPLYING LASSO

In [83]:
lasso =Lasso()

In [84]:
pipe = make_pipeline(column_trans,scaler,lasso)

In [85]:
pipe.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  ['location'])])),
                ('standardscaler', StandardScaler()), ('lasso', Lasso())])

In [86]:
y_pred_lasso = pipe.predict(X_test)
r2_score(y_test,y_pred_lasso)

0.8128285650772719

## APPLYING RIDGE

In [87]:
ridge = Ridge()

In [88]:
pipe = make_pipeline(column_trans,scaler,ridge)

In [89]:
pipe.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  ['location'])])),
                ('standardscaler', StandardScaler()), ('ridge', Ridge())])

In [90]:
y_pred_ridge = pipe.predict(X_test)
r2_score(y_test,y_pred_ridge)

0.82341466333127

In [92]:
print("No Regularization : ", r2_score(y_test,y_pred_lr))
print("Lasso :", r2_score(y_test,y_pred_lasso))
print("Ridge : ", r2_score(y_test,y_pred_ridge))

No Regularization :  0.823377411825468
Lasso : 0.8128285650772719
Ridge :  0.82341466333127
