## Saving & Loading already trained model using `pickle` & `joblib`

In [1]:
import pandas as pd
import numpy as np

In [7]:
df = pd.read_csv('data/heart-disease.csv')
x = df.drop('target',axis=1)
y = df.target

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

np.random.seed(1)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)
clf = RandomForestClassifier()

In [8]:
from sklearn.model_selection import GridSearchCV

grid = {'n_estimators': [10, 100, 200],
         'max_depth': [None, 10],
         'max_features': ['auto', 'sqrt'],
         'min_samples_split': [2],
         'min_samples_leaf': [2, 4]}

In [9]:
gs_clf = GridSearchCV(clf, grid, cv = 5, verbose=2)
gs_clf.fit(x_train, y_train)

In [11]:
gs_clf.best_params_

{'max_depth': None,
 'max_features': 'auto',
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'n_estimators': 200}

In [17]:
y_pred = gs_clf.predict(x_test)

y_pred_prob = gs_clf.predict_proba(x_test)
y_pred_prob_positive = y_pred_prob[:, 1]

In [21]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, roc_auc_score

print('accuracy       :',accuracy_score(y_test,y_pred))
print('recall         :',recall_score(y_test,y_pred))
print('precision      :',precision_score(y_test,y_pred))

accuracy       : 0.7704918032786885
recall         : 0.8709677419354839
precision      : 0.7297297297297297
roc_auc_score  : 0.875268817204301


## 1. Saving model using `pickle` module

In [22]:
import pickle

In [23]:
#save a model

pickle.dump(gs_clf, open('gs_clf_model.pkl', 'wb'))

In [25]:
#load a model

model = pickle.load(open('gs_clf_model.pkl','rb'))

In [35]:
pickle_pred = model.predict(x_test)

print('accuracy       :',accuracy_score(y_test,pickle_pred))
print('recall         :',recall_score(y_test,pickle_pred))
print('precision      :',precision_score(y_test,pickle_pred))

accuracy       : 0.7704918032786885
recall         : 0.8709677419354839
precision      : 0.7297297297297297


## 2. saving model using joblib

In [44]:
import joblib

joblib.dump(gs_clf, open('gs_clf_model.joblib'))

In [46]:
model = joblib.load('gs_clf_model.joblib')

In [48]:
model.best_params_

{'max_depth': None,
 'max_features': 'auto',
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'n_estimators': 200}

In [49]:
joblib_pred = model.predict(x_test)

print('accuracy       :',accuracy_score(y_test,joblib_pred))
print('recall         :',recall_score(y_test,joblib_pred))
print('precision      :',precision_score(y_test,joblib_pred))

accuracy       : 0.7704918032786885
recall         : 0.8709677419354839
precision      : 0.7297297297297297
