# Model Persistence

머신러닝 모델을 학습시킨 후, 모델 재사용을 위해 저장하고 로드하는 방법을 알아봅니다. 

## Scikit-Learn

Scikit-Learn 에서는 학습 후 모델을 저장하기 위해 joblib 모듈을 제공해 줍니다. 

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

import numpy as np
from sklearn import datasets, model_selection, linear_model, metrics
import os, joblib

# 데이터
np.random.seed(0)
n_samples = 100000
np_data_xs, np_data_ys = datasets.make_classification(
    n_samples=n_samples, # 데이터 수
    n_features=10, # X feature 수
    n_informative=3,
    n_classes=3, # Y class 수
    random_state=0) # 난수 발생용 Seed 값
print("data shape: np_data_xs={}, np_data_ys={}".format(np_data_xs.shape, np_data_ys.shape))
np_train_xs, np_test_xs, np_train_ys, np_test_ys = model_selection.train_test_split(
    np_data_xs, np_data_ys, 
    test_size=0.3, shuffle=True, random_state=2)
print("train shape: np_train_xs={}, np_train_ys={}".format(np_train_xs.shape, np_train_ys.shape))
print("test shape: np_test_xs={}, np_test_ys={}".format(np_test_xs.shape, np_test_ys.shape))

# 모델
model = linear_model.LogisticRegression(solver='sag', multi_class='multinomial')

# 학습
print("model={}".format(model))
model.fit(np_train_xs, np_train_ys)

# 평가
np_pred_ys = model.predict(np_test_xs)
cr = metrics.classification_report(np_test_ys, np_pred_ys)
print("classification_report\n", cr)

# 모델 저장/로드
path_model = "/tmp/model.joblib"
joblib.dump(model, path_model)
model = joblib.load(path_model)

# 재평가
np_pred_ys = model.predict(np_test_xs)
cr = metrics.classification_report(np_test_ys, np_pred_ys)
print("classification_report\n", cr)

data shape: np_data_xs=(100000, 10), np_data_ys=(100000,)
train shape: np_train_xs=(70000, 10), np_train_ys=(70000,)
test shape: np_test_xs=(30000, 10), np_test_ys=(30000,)
model=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=None, solver='sag',
          tol=0.0001, verbose=0, warm_start=False)
classification_report
               precision    recall  f1-score   support

           0       0.77      0.79      0.78     10004
           1       0.77      0.72      0.74     10004
           2       0.88      0.91      0.89      9992

   micro avg       0.81      0.81      0.81     30000
   macro avg       0.81      0.81      0.81     30000
weighted avg       0.81      0.81      0.81     30000

classification_report
               precision    recall  f1-score   support

           0       0.77      0.79      0.78     10004
           1   