# Tips #
+ Take note of Python and other library versions

In [14]:
# Save Model Using Pickle
import pandas
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
url = "http://mlr.cs.umass.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on 33%
model = LogisticRegression()
model.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

# Pickle #

In [15]:
import pickle
# save the model to disk
filename = '../../models/pickle_pima_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [16]:
# some time later...

# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)

0.755905511811


# joblib #

+ Joblib is part of the SciPy ecosystem and provides utilities for pipelining Python jobs.
+ It provides utilities for saving and loading Python objects that make use of NumPy data structures, efficiently.
+ This can be useful for some machine learning algorithms that require a lot of parameters or store the entire dataset (like K-Nearest Neighbors).

In [17]:
from sklearn.externals import joblib
# save the model to disk
filename = '../../models/joblib_pima_model.sav'
joblib.dump(model, filename)

['../../models/joblib_pima_model.sav',
 '../../models/joblib_pima_model.sav_01.npy',
 '../../models/joblib_pima_model.sav_02.npy',
 '../../models/joblib_pima_model.sav_03.npy',
 '../../models/joblib_pima_model.sav_04.npy']

In [18]:

# some time later...

# load the model from disk
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test, Y_test)
print(result)

0.755905511811
