# An example of saving the model to GridFS with the model meta data

This python notebook shows the next steps you would need to take the model generated and then save it would some additional data such as when it was created and the parameters used by that model to GridFS.

In [6]:
import datetime
import gridfs
import pandas as pd
import pickle
import pymongo
import urllib.request
import xgboost

from bson.binary import Binary
from sklearn import model_selection
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# URL for the Iris dataset (UCI Machine Learning Repository)
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

# download the file
raw_data = urllib.request.urlopen(url)

# load the CSV file as a numpy matrix
data = pd.read_csv(raw_data, header=None)
dataset = data.values

# split data into X (petal and sepal l/w's) and y (species name as string)
X = dataset[:, 0:4]
Y = dataset[:, 4]

# encode string class (species names) values as integers
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(Y)
label_encoded_y = label_encoder.transform(Y)

seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, label_encoded_y, test_size=test_size, random_state=seed)

# fit model no training data
model = xgboost.XGBClassifier()
model.fit(X_train, y_train)
# Save the time we created this model
model_created_at = datetime.datetime.utcnow()

# make the predictions on the test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# determine the accuracy of the classifer
accuracy = accuracy_score(y_test, predictions)

# Get the specific model name from the type
model_name = str(type(model))
model_name = model_name[8:-2]

# Connect to MongoDB and store the model (this assumes you have a mongod on 28017 on your laptop)
mongo = pymongo.MongoClient("localhost:28017")
grid_db = mongo.grid
fs = gridfs.GridFS(grid_db)
model_id = fs.put(pickle.dumps(model), model_type=model_name, model_created_at=model_created_at, model_param=model.get_xgb_params(), accuracy=accuracy)
