# Serializing and Deserializing in Python using scikit-learn
---
**Three ways:**

**1.** JSON

**2.** pickle

**3.** joblib

In [1]:
import json
import pickle
import joblib

import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
X, y = load_boston(return_X_y=True)
print(X.shape)
print(y.shape)

assert X.shape == (506, 13)
assert y.shape == (506,)

(506, 13)
(506,)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=28)

In [4]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

assert X_train.shape == (354, 13)
assert X_test.shape == (152, 13)
assert y_train.shape == (354,)
assert y_test.shape == (152,)

(354, 13)
(152, 13)
(354,)
(152,)


In [5]:
print("Linear Model")
linear_model = LinearRegression().fit(X_train, y_train)
y_pred_test = linear_model.predict(X_test)
print("Test score: ", r2_score(y_test, y_pred_test))

print("-"*30)

print("Tree Model")
tree_model = GradientBoostingRegressor().fit(X_train, y_train)
y_pred_test = tree_model.predict(X_test)
print("Test score: ", r2_score(y_test, y_pred_test))

Linear Model
Test score:  0.682129695744708
------------------------------
Tree Model
Test score:  0.8180312663162715


## 1. JSON Format

In [6]:
print("Coefficient parameters:", linear_model.coef_)
print("\nIntercept: ",linear_model.intercept_)

Coefficient parameters: [-9.35992075e-02  3.92793258e-02  1.92925522e-02  2.50621955e+00
 -1.77097508e+01  4.32898476e+00  3.29173281e-03 -1.24431560e+00
  3.02390932e-01 -1.15848174e-02 -9.81842087e-01  1.00781696e-02
 -4.77152997e-01]

Intercept:  31.67177332226347


In [7]:
model_param = {}

model_param['coef'] = list(linear_model.coef_)
model_param['intercept'] = linear_model.intercept_.tolist()

json_txt = json.dumps(model_param, indent=4)
print(json_txt)

{
    "coef": [
        -0.0935992074979253,
        0.03927932575945889,
        0.01929255223927326,
        2.506219545038105,
        -17.70975084096846,
        4.328984755094672,
        0.0032917328144949935,
        -1.2443156002329776,
        0.3023909315161962,
        -0.011584817400845417,
        -0.9818420869612504,
        0.010078169637325596,
        -0.47715299685209916
    ],
    "intercept": 31.67177332226347
}


In [8]:
Path("models").mkdir(parents=True, exist_ok=True)
with open('models/regressor_param.txt', 'w') as file:
    
    file.write(json_txt)

In [9]:
with open('models/regressor_param.txt', 'r') as file:
    
    json_text=json.load(file)

In [10]:
json_model = LinearRegression()

json_model.coef_ = np.array(json_text['coef'])
json_model.intercept_ = np.array(json_text['intercept'])

In [11]:
y_pred_test = json_model.predict(X_test)

print("Test score: ", r2_score(y_test, y_pred_test))

Test score:  0.682129695744708


## 2. pickle Format

In [12]:
Path("models").mkdir(parents=True, exist_ok=True)
pickle.dump(linear_model, open('models/model.pkl', 'wb'))

In [13]:
pickle_model = pickle.load(open('models/model.pkl', 'rb'))

In [14]:
y_pred_test = pickle_model.predict(X_test)

print("Test score: ", r2_score(y_test, y_pred_test))

Test score:  0.682129695744708


## 3. joblib Format

In [15]:
Path("models").mkdir(parents=True, exist_ok=True)

filename = 'models/model.joblib'
joblib.dump(tree_model, filename)

['models/model.joblib']

In [16]:
joblib_model = joblib.load(filename)

In [17]:
y_pred_test = joblib_model.predict(X_test)

print("Test score: ", r2_score(y_test, y_pred_test))

Test score:  0.8180312663162715
