# Analysing the results of cross validation for Scikit-Learn models

In [None]:
import json
import os

import numpy as np

from collections import Counter

from faculty_xval.utilities import most_recent_xval_dirs
from faculty_xval.validation import jobs_cross_validator_from_json

In [None]:
REFERENCE_DIR = "/project/{}/temp/".format(
    os.environ["USER_NAME"]
)

In [None]:
# Locate directory for most recent cross validation.
xval_dir = most_recent_xval_dirs(REFERENCE_DIR)[0]

In [None]:
# Reload instance of JobsCrossValidator.
cross_validator = jobs_cross_validator_from_json(
    os.path.join(xval_dir, "validator.json")
)
if cross_validator.model_type != "sklearn":
    raise TypeError("Model type must be Scikit-Learn")

In [None]:
# Gather the results of cross validation.
_, indices_test, predictions = cross_validator.gather()

In [None]:
# Reload the target data.
with open(
    os.path.join(
        xval_dir, cross_validator.targets_base
    ), "r"
) as f:
    targets = np.array(json.load(f))[0]

In [None]:
# Get test dataset of every split.
targets_test = np.take(
    targets,
    indices_test,
    axis=0
)

In [None]:
assert targets_test.shape == predictions.shape

In [None]:
# Check that the classes are balanced.
Counter(targets.tolist())

In [None]:
# Get the accuracy over training/test splits.
accuracy = (
    (targets_test == predictions).sum(axis=1)
    / targets_test.shape[1]
)

In [None]:
print(
    "Accuracy: {:.4g}% ± {:.4g}%".format(
        np.mean(accuracy) * 100.,
        np.std(accuracy) * 100.
    )
)