# Analysing the results of cross validation for Keras models

In [None]:
import json
import os

import numpy as np

from faculty_xval.utilities import most_recent_xval_dirs
from faculty_xval.validation import jobs_cross_validator_from_json

In [None]:
REFERENCE_DIR = "foo"

**Note**: Define `REFERENCE_DIR` as in the notebook `jobs_cross_validator_run.ipynb`. Recall that `Users` and `Developers` specify this variable in different ways.

In [None]:
# Locate directory for most recent cross validation.
xval_dir = most_recent_xval_dirs(REFERENCE_DIR)[0]

In [None]:
# Reload instance of JobsCrossValidator.
cross_validator = jobs_cross_validator_from_json(
    os.path.join(xval_dir, "validator.json")
)
if cross_validator.model_type != "keras":
    raise TypeError("Model type must be Keras")

In [None]:
# Gather the results of cross validation.
_, indices_test, predictions = cross_validator.gather()

In [None]:
# Check that the predicted probabilities
# add up to 1 with a small error.
np.abs(predictions.sum(axis=2) - 1).max()

In [None]:
# Reload the target data.
with open(os.path.join(xval_dir, cross_validator.targets_base), "r") as f:
    targets = np.array(json.load(f))[0]

In [None]:
# Get test dataset of every split.
targets_test = np.take(targets, indices_test, axis=0)

In [None]:
assert targets_test.shape == predictions.shape

In [None]:
# Check that the classes are balanced.
for i, count in enumerate(targets.sum(axis=0).astype(int)):
    print("Class {}: {}".format(i, count))

In [None]:
# Convert from one-hot encoding to
# labels with values from 0 to 9.
y_test = targets_test.argmax(axis=2)
y_pred = predictions.argmax(axis=2)

# Calculate the model accuracy
# on each train/test split.
accuracy = (y_test == y_pred).sum(axis=1) / y_test.shape[1]

In [None]:
print(
    "Accuracy: {:.4g}% ± {:.4g}%".format(
        np.mean(accuracy) * 100.0, np.std(accuracy) * 100.0
    )
)