In [107]:
import pandas as pd
import numpy as np

In [108]:
#Importing Cleaned DataSet
data = pd.read_csv('../../Csv/Pune_Cleaned_Data.csv')

In [109]:
#Droped the unwanted coloumn on DataSet
data.drop(columns=['Unnamed: 0'],inplace = True)

In [110]:
data.head()

Unnamed: 0,total_sqft,bath,price,location,bhk
0,1056.0,2.0,39.07,Alandi Road,2
1,2894.0,4.0,245.0,Alandi Road,4
2,1084.0,2.0,50.0,Alandi Road,2
3,1230.0,2.0,80.0,Alandi Road,2
4,1750.0,3.0,130.0,Alandi Road,3


In [111]:
X = data.drop(columns=['price'])
y = data['price']

In [112]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [113]:
X_train, X_test, y_train ,y_test = train_test_split(X, y , test_size=0.2 , random_state=0)

In [114]:
print(X_train.shape)
print(X_test.shape)

(5880, 4)
(1471, 4)


# LINEAR REGRESSION

In [115]:
column_trans = make_column_transformer((OneHotEncoder(sparse = False),['location']),remainder='passthrough')

In [116]:
scaler = StandardScaler()

In [117]:
lr = LinearRegression(normalize=True)

In [118]:
pipe = make_pipeline(column_trans,scaler,lr)

In [119]:
pipe.fit(X_train,y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  ['location'])])),
                ('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression(normalize=True))])

In [120]:
y_pred_lr =  pipe.predict(X_test)

In [121]:
r2_score(y_test,y_pred_lr)

0.8875979123741827

# APPLYING LASSO

In [122]:
lasso = Lasso()

In [123]:
pipe = make_pipeline(column_trans,scaler,lasso)

In [124]:
pipe.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  ['location'])])),
                ('standardscaler', StandardScaler()), ('lasso', Lasso())])

In [125]:
y_pred_lasso =  pipe.predict(X_test)
r2_score(y_test,y_pred_lasso)

0.8860926302632984

# APPLYING RIDGE

In [126]:
ridge = Ridge()

In [127]:
pipe = make_pipeline(column_trans,scaler,ridge)

In [128]:
pipe.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  ['location'])])),
                ('standardscaler', StandardScaler()), ('ridge', Ridge())])

In [129]:
y_pred_ridge = pipe.predict(X_test)
r2_score(y_test,y_pred_ridge)

0.8875875300861786

In [130]:
print("No Regularization : ", r2_score(y_test,y_pred_lr))
print("Lasso :", r2_score(y_test,y_pred_lasso))
print("Ridge : ", r2_score(y_test,y_pred_ridge))

No Regularization :  0.8875979123741827
Lasso : 0.8860926302632984
Ridge :  0.8875875300861786


# EXPORTING RIDGE MODEL PICKLE

In [131]:
import pickle

In [132]:
pickle.dump(pipe, open('../../Pickle/PuneRidgeModel.pkl','wb'))