In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

data = fetch_openml('mnist_784', version=1, parser='auto')  # data from https://www.openml.org/d/554
dfData = pd.DataFrame(np.c_[data["data"], data["target"]],
                      columns = data["feature_names"] + ["target"])

In [2]:
img_pipeline = Pipeline([("mm_scaler", MinMaxScaler())])
y = dfData["target"]
dfData = dfData.drop("target", axis=1)
X = dfData.copy()
X_transf = img_pipeline.fit_transform(X)

In [3]:
stratSplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
for train_index, test_index in stratSplit.split(X_transf, y):
    X_train = X_transf[train_index]
    X_test = X_transf[test_index]
    
    y_train = y[train_index]
    y_test = y[test_index]

In [None]:
log_reg = LogisticRegression(C=1e5, max_iter=100)
log_reg.fit(X_train, y_train)
print("Test set performance:",log_reg.score(X_test, y_test))

In [None]:
log_reg.fit(X_transf, y)
log_reg.score(X_transf, y)

In [None]:
# Option 1 joblib
from sklearn.externals import joblib
joblibModelName = 'MNISTClassifierJoblibSave.pkl'
joblib.dump(log_reg, joblibModelName)

In [7]:
# Option 2 Pickle
import pickle
pickleModelName = 'MNISTClassifierPickleSave.pkl'
with open(pickleModelName, "wb") as out:
    pickle.dump(log_reg, out)

## Saving pipeline

In [9]:
from sklearn.externals import joblib
pipelineName = 'MNISTClassifierPipeline.pkl'
joblib.dump(img_pipeline, pipelineName)

['ultimateMNISTClassifierPipeline.pkl']

## Loading Pipeline

In [10]:
pipelineName = 'MNISTClassifierPipeline.pkl'
loaded_pipeline = joblib.load(pipelineName)
X_trans_loaded = loaded_pipeline.transform(X)

## Loading Model

In [11]:
# Option 1 joblib
modelName = 'teMNISTClassifierJoblibSave.pkl'
loaded_log_clf = joblib.load(modelName)
loaded_log_clf.score(X_trans_loaded, y)

0.9347

In [12]:
# Option 2 pickle
modelName = 'MNISTClassifierPickleSave.pkl'
with open(modelName, 'rb') as inModel:
    loaded_log_clf = pickle.load(inModel)
loaded_log_clf.score(X_trans_loaded, y)

0.9347