In [None]:
%matplotlib inline

In [None]:
import glob
import os.path

import keras
import matplotlib.pyplot
import numpy
import pandas
import seaborn
import sklearn.metrics
import tensorflow

import deepometry.model
import deepometry.utils

# Evaluate

After training a model to classify single cell images, it is often useful to evaluate the performance of the model on an unseen annotated dataset. Evaluation helps predict model performance on unseen data.

Suppose we have the following directory structure. Data from this experiment was not shown to the model during training. Images are saved as NPY files with patient prefixes:

    /data/parsed/
        patient_C/
            positive/
                patient_C__32e88e1ac3a8f44bf8f77371155553b9.npy
                patient_C__3dc56a0c446942aa0da170acfa922091.npy  
                ...
            negative/
                patient_C__8068ef7dcddd89da4ca9740bd2ccb31e.npy
                patient_C__8348deaa70dfc95c46bd02984d28b873.npy
                ...
                

The data can be used to evaluate a model for classifying image data as one of the three classes. The `deepometry.utils.load` function selects images to use for evaluating the model and generate the labels for the evaluation images. We can limit the number of samples to 256 samples per-class by specifying `samples=256`.

In [None]:
x, y, units = deepometry.utils.load(["data/parsed/patient_C"], sample=256)

In [None]:
# build session running on GPU 1
configuration = tensorflow.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.visible_device_list = "3"
session = tensorflow.Session(config = configuration)

# apply session
keras.backend.set_session(session)

# Classification test

The evaluation and target data (`x` and `y`, respectively) is next passed to the model for evaluation. **A previously trained model is required.** The `evaluate` method loads the trained model weights. See the `fit` notebook for instructions on training a model. 

Evaluation data is provided to the model in batches of 32 samples. Use `batch_size` to configure the number of samples. A smaller `batch_size` requires less memory.

The evaluate function outputs the model's loss and accuracy metrics as the array `[loss, accuracy]`.

In [None]:
model = deepometry.model.Model(shape=x.shape[1:], units=4)

model.compile()

predicted = model.predict(
    batch_size=50,
    x=x
)

predicted = numpy.argmax(predicted, -1)
expected = y

In [None]:
confusion = sklearn.metrics.confusion_matrix(expected, predicted)

confusion = pandas.DataFrame(confusion)

matplotlib.pyplot.figure(figsize=(12, 8))

seaborn.heatmap(confusion, annot=True)

In [None]:
confusion = sklearn.metrics.confusion_matrix(expected, predicted)

confusion = confusion.astype('float') / confusion.sum(axis=1)[:, numpy.newaxis]

confusion = pandas.DataFrame(confusion)

matplotlib.pyplot.figure(figsize=(12, 8))

seaborn.heatmap(confusion, annot=True)

In [None]:
sklearn.metrics.accuracy_score(expected, predicted)