In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [7]:
automobile_data = pd.read_csv('data/CarPrice_Assignment.csv')

automobile_data.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


### Data Cleaning

In [8]:
automobile_data.drop(['car_ID', 'symboling', 'CarName'], axis=1, inplace=True)

In [10]:
automobile_data = pd.get_dummies(automobile_data)

automobile_data.head()

Unnamed: 0,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,...,cylindernumber_twelve,cylindernumber_two,fuelsystem_1bbl,fuelsystem_2bbl,fuelsystem_4bbl,fuelsystem_idi,fuelsystem_mfi,fuelsystem_mpfi,fuelsystem_spdi,fuelsystem_spfi
0,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,...,0,0,0,0,0,0,0,1,0,0
1,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,...,0,0,0,0,0,0,0,1,0,0
2,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,9.0,154,...,0,0,0,0,0,0,0,1,0,0
3,99.8,176.6,66.2,54.3,2337,109,3.19,3.4,10.0,102,...,0,0,0,0,0,0,0,1,0,0
4,99.4,176.6,66.4,54.3,2824,136,3.19,3.4,8.0,115,...,0,0,0,0,0,0,0,1,0,0


In [11]:
automobile_data.shape

(205, 52)

In [12]:
X = automobile_data.drop('price', axis=1)
Y = automobile_data['price']

In [13]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size= 0.2)

In [14]:
x_train.shape, x_test.shape

((164, 51), (41, 51))

In [15]:
clf = LinearRegression().fit(x_train, y_train)
train_score = clf.score(x_train, y_train)
train_score

0.938044590564892

In [16]:
y_pred = clf.predict(x_test)
test_score = r2_score(y_test, y_pred)
test_score

0.8842296212917656

### serializing models

In [17]:
import json

In [21]:
model_coef = clf.coef_
model_intercept = clf.intercept_


In [23]:
model_param = {}
model_param['coef'] = list(model_coef)
model_param['intercept'] = model_intercept.tolist()

In [24]:
json_txt = json.dumps(model_param, indent=4)

json_txt

'{\n    "coef": [\n        37.5056930495306,\n        -70.30170309087484,\n        637.4712110562288,\n        101.48008228461133,\n        4.381546975033129,\n        111.44446266442537,\n        -1084.1585812926626,\n        -4488.969511384591,\n        -605.7241001420583,\n        8.48370303820549,\n        2.2371028632842362,\n        -138.04983335487273,\n        150.11714035822308,\n        3556.346495358536,\n        -3556.346495358559,\n        -960.6745217547725,\n        960.6745217547787,\n        -85.99769265196238,\n        85.9976926519518,\n        2439.3408506691826,\n        -673.1913385406621,\n        -909.4383683683626,\n        175.19550102797575,\n        -1031.9066447881676,\n        -237.1835843758087,\n        -50.409467869765095,\n        287.59305224556203,\n        -3530.6826432775397,\n        3530.6826432775406,\n        -63.19463606002239,\n        2.0463630789890885e-12,\n        -667.0710452245089,\n        3123.665218703975,\n        791.5608931610349,

In [26]:
with open('serialized models/reg_param.json', 'w') as file:
    file.write(json_txt)

### Deserialize the model into python object

In [27]:
with open('serialized models/reg_param.json', 'r') as file:
    json_read_txt = json.load(file)

In [28]:
json_model = LinearRegression()
json_model.coef_ = np.array(json_read_txt['coef'])
json_model.intercept_ = np.array(json_read_txt['intercept'])

In [29]:
y_pred1 = json_model.predict(x_test)
r2_score(y_test, y_pred)

0.8842296212917656

In [30]:
test_score

0.8842296212917656

### Pickle method

In [31]:
import pickle

In [32]:
pickle.dump(clf, open('serialized models/model.pkl','wb'))

In [33]:
pickle_model = pickle.load(open('serialized models/model.pkl', 'rb'))

In [34]:
pickle_model.predict(x_test)

array([ 9313.7903431 , 10459.59547117,  8019.3461695 ,  7556.66723925,
       13948.62376672,  8877.65229866,  9003.27929407, 17872.01098057,
       13072.27334029, 19058.35262357, 12502.82689125, 21585.7370651 ,
       41365.57743669, 38840.48617446,  9737.0489712 ,  7626.77199085,
        8404.59698771,  8678.19967284, 10119.2247979 ,  6490.83409843,
       17620.59634078,  7203.47846633,  5126.95562554, 19642.79740121,
        8665.16136912,  9801.83586983, 17443.20492006, 21043.97523529,
        8131.79928706, 26219.87714314, 10013.08643063, 12705.92841108,
       16696.5235815 , 20974.98553696, 12867.81080355, 21472.20011176,
        7980.92523606, 19722.60690761, 13789.04224348, 17278.72082829,
        6272.46198322])

In [39]:
r2_score(y_test, pickle_model.predict(x_test))

0.8842296212917656

 ### using Joblib

In [36]:
import joblib

In [37]:
joblib.dump(clf, 'serialized models/model.joblib')

['serialized models/model.joblib']

In [38]:
joblib_model = joblib.load('serialized models/model.joblib')

In [40]:
r2_score(y_test, joblib_model.predict(x_test))

0.8842296212917656

## This verifies that the models can be serialized and deserialized with any of these techniques.