In [3]:
# Train XGBoost model, save to file using pickle, load and make predictions
import pandas as pd
from xgboost import XGBClassifier
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# load data
dataset = pd.read_csv('course-data/diabetes.csv')
# split data into X and y
X = dataset.iloc[:, 0:8] 
Y = dataset.iloc[:, 8] 
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size,
random_state=seed)
# fit model on training data
model = XGBClassifier(
    booster='gbtree',  # The booster type to use, default is 'gbtree'
    objective='binary:logistic',  # Specify the learning task and the corresponding objective function
    learning_rate=0.1,  # Learning rate, default is 0.3
    max_depth=3,  # Maximum depth of a tree, default is 6
    n_estimators=100,  # Number of trees to fit, default is 100
    random_state=42  # Random seed for reproducibility
)
model.fit(X_train, y_train)

# save model to file
pickle.dump(model, open("models/pima.pickle.dat", "wb"))
print("Saved model to: models/pima.pickle.dat")
# some time later...
# load model from file
loaded_model = pickle.load(open("models/pima.pickle.dat", "rb"))
print("Loaded model from: models/pima.pickle.dat")
# make predictions for test data
predictions = loaded_model.predict(X_test)
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Saved model to: models/pima.pickle.dat
Loaded model from: models/pima.pickle.dat
Accuracy: 77.17%


In [6]:
# Train XGBoost model, save to file using joblib, load and make predictions
import pandas as pd
from xgboost import XGBClassifier
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# load data
dataset = pd.read_csv('course-data/diabetes.csv')
# split data into X and y
X = dataset.iloc[:, 0:8] 
Y = dataset.iloc[:, 8] 
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size,
random_state=seed)
# fit model on training data
model = XGBClassifier(
    booster='gbtree',  # The booster type to use, default is 'gbtree'
    objective='binary:logistic',  # Specify the learning task and the corresponding objective function
    learning_rate=0.1,  # Learning rate, default is 0.3
    max_depth=3,  # Maximum depth of a tree, default is 6
    n_estimators=100,  # Number of trees to fit, default is 100
    random_state=42  # Random seed for reproducibility
)
model.fit(X_train, y_train)

# save model to file
joblib.dump(model, "models/pima.joblib.dat")
print("Saved model to: models/pima.joblib.dat")
# some time later...
# load model from file
loaded_model = joblib.load("models/pima.joblib.dat")
print("Loaded model from: models/pima.joblib.dat")
# make predictions for test data
predictions = loaded_model.predict(X_test)
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Saved model to: models/pima.joblib.dat
Loaded model from: models/pima.joblib.dat
Accuracy: 77.17%
