# Model Validation

Wherein we take the model as trained and tuned on the full training set and check it against the hold-out test set split off at the beginning of development.

In [1]:
import sqlite3
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score

train_conn = sqlite3.connect('./sqlite/training_incidents.sqlite')
test_conn  = sqlite3.connect('./sqlite/validation_incidents.sqlite')

r_seed = 38

train_inputs = []
train_labels = []
test_inputs = []
test_labels = []

train_results = test_conn.execute("SELECT * from incidents")
test_results = test_conn.execute("SELECT * from incidents")

for rec in train_results:
    train_labels.append(rec[1])
    train_inputs.append(rec[2:])

for rec in test_results:
    test_labels.append(rec[1])
    test_inputs.append(rec[2:])


inputs_train = np.array(train_inputs)
inputs_test = np.array(test_inputs)
labels_train = np.array(train_labels)
labels_test = np.array(test_labels)


At this point all the data is loaded, both training and test, so now we take the parameters
from the tuned algorithm and train it on the full set before scoring it against the test data

In [2]:
from sklearn.ensemble import GradientBoostingRegressor

gbr_clf = GradientBoostingRegressor(alpha=0.9, init=None, learning_rate=1.0, loss='ls',
             max_depth=3, max_features=None, max_leaf_nodes=None,
             min_samples_leaf=5, min_samples_split=5,
             min_weight_fraction_leaf=0.0, n_estimators=100,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=False)

gbr_clf.fit(inputs_train, labels_train)

labels_predict = gbr_clf.predict(inputs_test)

print "EVS", explained_variance_score(labels_test, labels_predict)
print "MAE", mean_absolute_error(labels_test, labels_predict)
print "MSE", mean_squared_error(labels_test, labels_predict)
print "MedAE", median_absolute_error(labels_test, labels_predict)
print "r^2", r2_score(labels_test, labels_predict)

EVS 0.676676560513
MAE 0.809925635314
MSE 1.43182316638
MedAE 0.534865737451
r^2 0.676676560513


This is quite promising; here I'm showing an r^2 score of 0.68 against a quite large dataset (30k) that has never been seen by the model before either in training or tuning.  The next step is to re-train the model from scratch on the entire dataset and persist it so it can be incorporated into other programs without paying a retraining penalty