In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
automobile_data=pd.read_csv('./Dataset/CarPrice_Assignment.csv')
automobile_data.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [3]:
automobile_data.drop(['car_ID','symboling','CarName'],axis=1,inplace=True)

In [4]:
automobile_data=pd.get_dummies(automobile_data)

In [5]:
X=automobile_data.drop('price',axis=1)
Y=automobile_data['price']
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2)

In [6]:
model=LinearRegression().fit(x_train,y_train)

In [7]:
Training_score=model.score(x_train,y_train)

In [8]:
Training_score

0.93521329572194278

In [9]:
y_pred=model.predict(x_test)

In [10]:
testing_score=r2_score(y_test,y_pred)
testing_score

0.93028258135585784

## Working with json
saving the model in the json format

In [11]:
import json
model.coef_

array([  7.36407122e+01,  -6.54098372e+01,   6.06020709e+02,
         2.70506380e+01,   4.18812863e+00,   1.07310504e+02,
        -2.22936616e+03,  -3.91822245e+03,  -7.81170158e+02,
         1.95566295e+01,   2.10782233e+00,  -2.78982337e+02,
         2.71605675e+02,   4.26071941e+03,  -4.26071941e+03,
        -6.52552857e+02,   6.52552857e+02,  -2.21743189e+02,
         2.21743189e+02,   2.41617486e+03,  -1.17267988e+03,
        -1.00418769e+03,   3.99666356e+02,  -6.38973643e+02,
        -7.65352736e+02,   8.46545150e+01,   6.80698221e+02,
        -3.28612462e+03,   3.28612462e+03,   1.32520611e+03,
        -8.22626794e+03,   8.76062371e+02,   4.60851098e+03,
         3.60344543e+03,  -4.56368246e+03,   2.37672551e+03,
         5.81120349e+03,  -4.80208149e+03,  -5.49314621e+03,
        -1.99595736e+03,   4.10325606e+03,   0.00000000e+00,
         2.37672551e+03,   2.76193374e+02,   8.65971187e+02,
        -4.92131087e+02,   4.26071941e+03,  -3.13215570e+03,
         4.95260916e+02,

In [12]:
model.intercept_

-27028.389466957658

In [13]:
model_param={}
model_param['coef']=list(model.coef_)
model_param['intercept']=model.intercept_.tolist()

In [14]:
json_txt=json.dumps(model_param,indent=4)
json_txt

'{\n    "coef": [\n        73.64071219217497,\n        -65.4098371912415,\n        606.0207091580105,\n        27.050638019147875,\n        4.188128632741859,\n        107.31050361804567,\n        -2229.3661582128752,\n        -3918.222454443951,\n        -781.1701580128433,\n        19.556629486946893,\n        2.1078223277179395,\n        -278.9823371570366,\n        271.60567498090774,\n        4260.719406760941,\n        -4260.719406760959,\n        -652.5528573332725,\n        652.5528573332388,\n        -221.74318884929227,\n        221.74318884932728,\n        2416.1748608843086,\n        -1172.6798835043423,\n        -1004.1876903885221,\n        399.6663558171317,\n        -638.9736428085173,\n        -765.3527362890395,\n        84.65451500626666,\n        680.6982212827656,\n        -3286.124615270693,\n        3286.124615270697,\n        1325.2061126155716,\n        -8226.267941506736,\n        876.0623714392494,\n        4608.510980125284,\n        3603.4454294515544,\n   

In [15]:
with open('models/regressor_param.txt','w') as file:
    file.write(json_txt)

In [16]:
with open('models/regressor_param.txt','r') as file:
    json_text=json.load(file)

In [17]:
json_model=LinearRegression()

In [18]:
json_model.coef_=np.array(json_text['coef'])
json_model.intercept_=np.array(json_text['intercept'])

In [19]:
y_pred=json_model.predict(x_test)
r2_score(y_test,y_pred)

0.93028258135585784

In [20]:
testing_score

0.93028258135585784

## Working with pickle
saving the model in pickle format

In [21]:
import pickle


In [22]:
pickle.dump(model,open('models/model.pkl','wb'))

In [23]:
pickle_model=pickle.load(open('models/model.pkl','rb'))
y_pred=pickle_model.predict(x_test)


In [24]:
r2_score(y_test,y_pred)

0.93028258135585784

## Working with joblib
saving the model using joblib

In [25]:
import joblib
filename='models/model.joblib'
joblib.dump(model,filename)

['models/model.joblib']

In [26]:
joblib_model=joblib.load(filename)
y_pred=joblib_model.predict(x_test)
r2_score(y_test,y_pred)

0.93028258135585784