In [2]:
import time
import os
import numpy as np
import tensorflow as tf
import importlib
from datetime import datetime
from tensorflow.python.framework.ops import reset_default_graph
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

import utils

SAVER_PATH = {'base': 'train/',
              'checkpoint': 'checkpoints/',
              'log': 'logs/',
              'test': 'test/'}
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

def load_config(config_name):
    config_path = 'configurations.' + config_name
    config = importlib.import_module(config_path)
    return config

def load_path(config_name, split, epoch=None):
    name = "%s-%d" % (config_name, split)
    local_path = os.path.join(SAVER_PATH['base'], name)
    checkpoint_saver = tf.train.Saver()
    checkpoint_path = os.path.join(local_path, SAVER_PATH['checkpoint'])
    checkpoint_file_path = os.path.join(checkpoint_path, 'checkpoint')
    if epoch is None:
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
    else:
        latest_checkpoint = "%s-%d" % (checkpoint_file_path, epoch)
    return checkpoint_saver, latest_checkpoint

def validate(sess, gen, l_out_softmax, X_pl, t_pl):
    outs = []
    targets = []
    v_sum = 0
    for batch, i in gen():
        fetches = [l_out_softmax]
        feed_dict = {X_pl: batch['X'], t_pl: batch['t'], is_training_pl: False}
        out = sess.run(fetches=fetches, feed_dict=feed_dict)[0]
        outs.append(out)
        targets.append(batch['t'])
        v_sum += i
    outs = np.concatenate(outs, axis=0)[:v_sum]
    targets = np.concatenate(targets, axis=0)[:v_sum]
    aucs = utils.auc(targets, outs[:, 1])
    preds = outs[:, 1]>0.5
    accs = np.mean(np.equal(preds, targets))
    return outs, targets, aucs, accs

In [3]:
name_split_epochs = [("rnn_big", 1, [19501, 15001]),
                     ("rnn_big", 2, [27001, 19501]),
                     ("rnn_big", 3, [6501, 11001]),
                     ("rnn_big", 4, [12501, 8001]),
                     ("rnn_big", 5, [13001, 16001]),
                     ("rnn_big", 6, [25001, 20501]),
                     ("rnn_big", 7, [16001, 19501]),
                     ("rnn_big", 8, [22001, 26501])]

valid_preds = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: []}
valid_targets = {1: None, 2: None, 3: None, 4: None, 5: None, 6: None, 7: None, 8: None}
test_preds = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: []}
test_targets = {1: None, 2: None, 3: None, 4: None, 5: None, 6: None, 7: None, 8: None}
for config_name, split, epochs in name_split_epochs:
    print("%s-%d" % (config_name, split))
    for epoch in epochs:
        print("  %d" % epoch)
        reset_default_graph()
        config = load_config(config_name)
        data_gen = config.data_gen(split, train=False)
        X_pl, t_pl, is_training_pl, _, l_out_softmax, loss, accuracy, train_op, global_step = config.model()
        checkpoint_saver, latest_checkpoint = load_path(config_name, split, epoch=epoch)
        print(latest_checkpoint)
        gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as sess:
            checkpoint_saver.restore(sess, latest_checkpoint)
            outs, targets, aucs, accs = validate(sess, data_gen.gen_valid, l_out_softmax, X_pl, t_pl)
            print("  valid accs: %f" % accs)
            print("  valid aucs: %f" % aucs)
            valid_preds[split].append(outs)
            valid_targets[split] = targets
            outs, targets, aucs, accs = validate(sess, data_gen.gen_test, l_out_softmax, X_pl, t_pl)
            print("  test accs: %f" % accs)
            print("  test aucs: %f" % aucs)
            test_preds[split].append(outs)
            test_targets[split] = targets
    print

rnn_big-1
  19501
initializing data generator!
Train already downloaded ...
Test already downloaded ...
['t_valid', 'X_test', 'X_valid', 't_test']
Valid is found!
Test is found!
building model ...
building model ...
X_pl (?, 64)
t_pl (?,)
loss (?,)
loss ()
argmax (?,)
correct, (?,)
accuracy ()
building optimizer ...
train/rnn_big-1/checkpoints/checkpoint-19501
  valid accs: 0.916056
  valid aucs: 0.968517
  test accs: 0.890742
  test aucs: 0.955796
  15001
initializing data generator!
Train already downloaded ...
Test already downloaded ...
['t_valid', 'X_test', 'X_valid', 't_test']
Valid is found!
Test is found!
building model ...
building model ...
X_pl (?, 64)
t_pl (?,)
loss (?,)
loss ()
argmax (?,)
correct, (?,)
accuracy ()
building optimizer ...
train/rnn_big-1/checkpoints/checkpoint-15001
  valid accs: 0.898824
  valid aucs: 0.967333
  test accs: 0.860266
  test aucs: 0.948662

rnn_big-2
  27001
initializing data generator!
Train already downloaded ...
Test already downloaded ...

In [4]:
valid_preds[3][0][:,1].mean()

0.29146796

In [6]:
v_tot_preds = []
v_tot_targets = []
print("hi")
for key, values in valid_preds.iteritems():
    print("key,", key)
    print("values", len(values))
    tot_models = len(values)
    v_john = None
    for tot_out in values:
        if v_john is None:
            v_john = tot_out
        else:
            v_john += tot_out
    v_targets = valid_targets[key]
    v_aucs = utils.auc(v_targets, v_john[:, 1])
    v_preds = v_john[:, 1]>0.5
    v_accs = np.mean(np.equal(v_preds, v_targets))
    print("  valid accs: %f" % v_accs)
    print("  valid aucs: %f" % v_aucs)
    v_tot_preds.append(v_john)
    v_tot_targets.append(v_targets)
v_tot_preds = np.concatenate(v_tot_preds, axis=0)
v_tot_targets = np.concatenate(v_tot_targets, axis=0)
v_tot_aucs = utils.auc(v_tot_targets, v_tot_preds[:, 1])
v_tot_preds = v_tot_preds[:, 1]>0.5
v_tot_accs = np.mean(np.equal(v_tot_preds, v_tot_targets))
print("TOTAL VALID")
print("  valid accs: %f" % v_tot_accs)
print("  valid aucs: %f" % v_tot_aucs)

t_tot_preds = []
t_tot_targets = []
for key, values in test_preds.iteritems():
    print("key,", key)
    print("values", len(values))
    tot_models = len(values)
    t_john = None
    for tot_out in values:
        if t_john is None:
            t_john = tot_out
        else:
            t_john += tot_out
    t_john = t_john/float(tot_models)
    t_targets = test_targets[key]
    t_aucs = utils.auc(t_targets, t_john[:, 1])
    t_preds = t_john[:, 1]>0.5
    t_accs = np.mean(np.equal(t_preds, t_targets))
    print("  test accs: %f" % t_accs)
    print("  test aucs: %f" % t_aucs)
    t_tot_preds.append(t_john)
    t_tot_targets.append(t_targets)
t_tot_preds = np.concatenate(t_tot_preds, axis=0)
t_tot_targets = np.concatenate(t_tot_targets, axis=0)
t_tot_aucs = utils.auc(t_tot_targets, t_tot_preds[:, 1])
t_tot_preds = t_tot_preds[:, 1]>0.5
t_tot_accs = np.mean(np.equal(t_tot_preds, t_tot_targets))
print("TOTAL TEST")
print("  test accs: %f" % t_tot_accs)
print("  test aucs: %f" % t_tot_aucs)

hi
('key,', 1)
('values', 2)
  valid accs: 0.646781
  valid aucs: 0.969461
('key,', 2)
('values', 2)
  valid accs: 0.751985
  valid aucs: 0.954996
('key,', 3)
('values', 2)
  valid accs: 0.469332
  valid aucs: 0.947289
('key,', 4)
('values', 2)
  valid accs: 0.359739
  valid aucs: 0.910897
('key,', 5)
('values', 2)
  valid accs: 0.653759
  valid aucs: 0.989036
('key,', 6)
('values', 2)
  valid accs: 0.790463
  valid aucs: 0.971424
('key,', 7)
('values', 2)
  valid accs: 0.733516
  valid aucs: 0.970494
('key,', 8)
('values', 2)
  valid accs: 0.910815
  valid aucs: 0.970651
TOTAL VALID
  valid accs: 0.679704
  valid aucs: 0.957737
('key,', 1)
('values', 2)
  test accs: 0.784689
  test aucs: 0.954150
('key,', 2)
('values', 2)
  test accs: 0.920112
  test aucs: 0.965510
('key,', 3)
('values', 2)
  test accs: 0.913651
  test aucs: 0.789593
('key,', 4)
('values', 2)
  test accs: 0.892116
  test aucs: 0.930005
('key,', 5)
('values', 2)
  test accs: 0.930871
  test aucs: 0.971305
('key,', 6)
(

In [None]:
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
print("validation")
#np.save("total_valid_outs.npy", total_valid_outs)
#np.save("total_valid_targets.npy", total_valid_targets)
plt.figure()
plt.plot(fpr_valid, tpr_valid)
plt.show()
#print(total_outs[-10:, 0])


cnf_matrix = confusion_matrix(total_valid_targets, np.argmax(total_valid_outs, axis=1))
plt.figure()
plot_confusion_matrix(cnf_matrix, ["adherent", "non-adherent"])

In [None]:
print("test")
#np.save("total_test_outs.npy", total_test_outs)
#np.save("total_test_targets.npy", total_test_targets)
plt.figure()
plt.plot(fpr_test, tpr_test)
plt.show()


cnf_matrix = confusion_matrix(total_test_targets, np.argmax(total_test_outs, axis=1))
plt.figure()
cnf_matrix = confusion_matrix(total_test_targets, np.argmax(total_test_outs, axis=1))
plt.figure()
plot_confusion_matrix(cnf_matrix, ["adherent", "non-adherent"])