# Save and Load Trained XGBoost Models

 


How to save and later load your trained XGBoost model using pickle.
How to save and later load your trained XGBoost model using joblib.


### Serialize Models with Pickle


In [1]:
# Train XGBoost model, save to file using pickle, load and make predictions
from numpy import loadtxt
from xgboost import XGBClassifier
import pickle
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

# load data
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

# split data into X and y
X = dataset[:,0:8]
Y = dataset[:,8]

# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size,
random_state=seed)

# fit model no training data
model = XGBClassifier()
model.fit(X_train, y_train)

# save model to file
pickle.dump(model, open("pima.pickle.dat", "wb"))
print("Saved model to: pima.pickle.dat")

# some time later...
# load model from file
loaded_model = pickle.load(open("pima.pickle.dat", "rb"))
print("Loaded model from: pima.pickle.dat")
# make predictions for test data
y_pred = loaded_model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))


Saved model to: pima.pickle.dat
Loaded model from: pima.pickle.dat
Accuracy: 77.95%


### Serialize Models with Joblib


In [3]:
# Train XGBoost model, save to file using joblib, load and make predictions
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.externals import joblib
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

# load data
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

# split data into X and y
X = dataset[:,0:8]
Y = dataset[:,8]

# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size,random_state=seed)

# fit model no training data
model = XGBClassifier()
model.fit(X_train, y_train)

# save model to file
joblib.dump(model, "pima.joblib.dat")
print("Saved model to: pima.joblib.dat")
# some time later...

# load model from file
loaded_model = joblib.load("pima.joblib.dat")
print("Loaded model from: pima.joblib.dat")
# make predictions for test data
y_pred = loaded_model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))



Saved model to: pima.joblib.dat
Loaded model from: pima.joblib.dat
Accuracy: 77.95%
