In [1]:
import sys
for p in ['..']:
  if p not in sys.path:
    sys.path.insert(0, p)

from collections import namedtuple
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import scipy.io as spio
from sklearn.linear_model import LogisticRegression

from digits.data import Env, Loader, prepare_cropped
from digits.classifiers import test_model

%matplotlib inline

In [2]:
env = Env('..')
env.assert_ready()
loader = Loader.from_env(env)
loader.assert_ready()
data = loader.read_cropped('test')
train_data = prepare_cropped(data, keep=1000, gray=True)
valid_data = prepare_cropped(data, drop=1000, keep=100, gray=True)
test_data = prepare_cropped(data, drop=1100, keep=100, gray=True)

In [3]:
res = test_model(env, 'baseline', test_data)

In [28]:
import numpy as np
import scipy.stats
import sklearn.metrics

from digits.common import un_hot

class Metrics:
  def __init__(self, num_classes, orig, inv_map, actual_hot, expected):
    self.num_classes = num_classes
    self.orig = orig
    self.inv_map = inv_map
    self.actual_hot = actual_hot
    self.actual = un_hot(num_classes, actual_hot)
    self.expected = expected

  def accuracy(self):
    return sklearn.metrics.accuracy_score(self.actual, self.expected)

  def entropy(self):
    return np.apply_along_axis(scipy.stats.entropy, 0, self.actual_hot)

  def most_uncertain(self):
    e = self.entropy()
    return np.argsort(-e)

In [29]:
m = Metrics(10, data, None, res, test_data.y)

In [22]:
m.accuracy()

0.26000000000000001

In [23]:
m.entropy()

array([ 0.69314718,  3.58351894,  3.33220451,  2.19722458,  1.60943791,
        1.60943791,  1.09861229,  0.        ,  1.09861229,  2.07944154])

In [30]:
m.most_uncertain()

array([1, 2, 3, 9, 4, 5, 6, 8, 0, 7])

In [31]:
m.entropy()[m.most_uncertain()]

array([ 3.58351894,  3.33220451,  2.19722458,  2.07944154,  1.60943791,
        1.60943791,  1.09861229,  1.09861229,  0.69314718,  0.        ])