In [None]:
%load_ext autoreload 
%autoreload 2
#%matplotlib notebook
import matplotlib.pyplot as plt
import random
import numpy as np
import sys
import os
os.chdir("/Users/pranavrajpurkar/Documents/Code/ecg")
sys.path.append('./ecg')

## Loading Data

In [None]:
LOAD_FROM_MODEL = True
LOAD_FROM_FILE = False

model_paths = ["./saved/default/0.356-0.875-010-0.294-0.892.hdf5"]

import load
import json
import util
import predict

if LOAD_FROM_MODEL is True:
    params = util.get_model_params(model_paths[0])
    x, model_gt, processor, dl = load.load_test(
            json.load(open('./configs/test.json', 'r')),
            train_params=params,
            split='test')
    model_probs = predict.get_ensemble_pred_probs(model_paths, x)
elif LOAD_FROM_FILE is True:
    params = json.load(open('./configs/train.json', 'r'))
    dl, processor = load.load_train(params)

In [None]:
x = dl.x_test
y = dl.y_test

def from_one_hot_to_int(label):
    return np.argmax(label, axis=-1)

def get_x_y_predictions_at_index(index, probs=None):
    x_sample = x[index]
    y_sample = from_one_hot_to_int(y[index])
    y_prediction = None
    if probs is not None:
        y_prediction = np.argmax(probs[index], axis=-1)
    return x_sample, y_sample, y_prediction

def get_sample_from_classes(
        categories, min_mistakes = 20, num_tries = 1000, only_classes=False, probs=None):
    classes = np.array([processor.class_to_int[c] for c in categories])
    y_maxed = np.argmax(y, axis=-1)
    indices = np.where(np.array([np.in1d(classes, row).all() for row in y_maxed]))[0]
    for _ in range(num_tries):
        index = random.choice(indices)
        y_prediction = None
        x_sample, y_sample, y_prediction = get_x_y_predictions_at_index(index, probs=probs)
        if only_classes:
            if (set(np.unique(y_sample)) != set(np.unique(classes))):
                continue
        num_wrong = 0
        if y_prediction is None:
            break
        num_wrong = np.sum(y_sample != y_prediction)
        if (num_wrong > min_mistakes):
            print("Prediction got wrong " +  str(num_wrong * 1.0 / len(y_sample)))
            break
    return x_sample, y_sample, y_prediction, index

In [None]:
import matplotlib.cm as cm
from itertools import groupby
plt.rcParams["figure.figsize"] = (14, 5)

def from_int_to_name(l):
    return processor.classes[l]

def draw_sample(x_sample, y_sample, y_prediction, step, show_label=True, save=False, small_frame=False):
    colors = cm.rainbow(np.linspace(0, 1, 20))
    y_times = np.linspace(step/2, len(x_sample) - step/2, len(y_sample))
    if show_label is True:
        grouped_labels = [(k, sum(1 for i in g)) for k,g in groupby(y_sample)]
        acc = 0
        seen = {}
        for label, number in grouped_labels:
            params = {
                "color": colors[label],
                "alpha": 0.5,
                "lw": 0
            }
            if label not in seen:
                label_name = from_int_to_name(label)
                params["label"] = label_name
                seen[label] = True
            plt.axvspan(
                acc * step,
                (acc + number) * step, **params)
            acc += number
    print(np.array(processor.classes)[y_sample])
    if y_prediction is not None:
        print(np.array(processor.classes)[y_prediction])
    plt.plot(x_sample, color='#333333', alpha=1)
    plt.legend(loc="best")
    plt.yticks([])
    plt.xticks([])
    plt.tight_layout()
    plt.gcf().subplots_adjust(left=0.0)
    if small_frame is True:
        plt.xlim([0, 2000])
    if save is True:
        plt.savefig(str(np.unique(np.array(processor.classes)[y_sample])[0]) + "-" + str(index),
                    bbox_inches='tight')
    plt.show()
    plt.close()

#for class_indiv in processor.classes:
x_sample, y_sample, y_prediction, index = get_sample_from_classes(
    [u'NSR'], only_classes=False, probs=model_probs)
draw_sample(x_sample, y_sample, y_prediction, params["step"],
            save=False, show_label=True, small_frame=False)

## Human Numbers

In [None]:
gt_all = []
probs_all = []
for i in range(6):
    test_params = json.load(open('./configs/test.json', 'r'))
    test_params["epi_ext"] = "_rev" + str(i) + ".episodes.json"
    _, pb, dl = load.load_x_y_with_processor(test_params, processor)
    gt_all.append(model_gt)
    probs_all.append(pb)
human_gt = np.concatenate(tuple(gt_all), axis=1)
human_probs = np.concatenate(tuple(probs_all), axis=0)

## Evaluating Models

In [None]:
import evaluate
from tabulate import tabulate

f1_data = []
for metric in ['seq', 'set']:
    # models
    evaluator = evaluate.evaluate_multiclass(
        model_gt, model_probs, processor.classes, metric, ', '.join(model_paths), display_scores=False)
    model_plotMat, model_support, class_names = evaluate.parse_classification_report(evaluator.scorer.report)
    model_f1 = model_plotMat[:, 2]
    f1_data.append(model_f1)

    # humans
    evaluator = evaluate.evaluate_multiclass(
        human_gt, human_probs, processor.classes, metric, ', '.join(model_paths), display_scores=False)
    human_plotMat, human_support, class_names = evaluate.parse_classification_report(evaluator.scorer.report)
    human_f1 = human_plotMat[:, 2]
    f1_data.append(human_f1)

f1_data = np.array(f1_data).T
cell_text = []
for row, class_name in zip(f1_data, class_names):
    cell_text.append([class_name] + ['%1.3f' % x for x in row])

table = tabulate(
    cell_text, tablefmt="latex", floatfmt=".3f",
    headers=["Model seq", "Human seq", "Model set", "Human set"])

rows = []
import re
for row in table.split('\n'):
    elems = re.split('\s+', row)
    if len(elems) > 2 and "seq" not in elems:
        for start in [3, 7]:
            end = start+2
            winner = start if float(elems[start]) > float(elems[end]) else end
            elems[winner] = "\\textbf{" + elems[winner] + "}"
    row = " ".join(elems)
    rows.append(row)
table = "\n".join(rows)
print(table)

In [None]:
cell_text = []
for row in f1_data:
    cell_text.append(['%1.3f' % x for x in row])

colLabels=("Model", "Human", "Model", "Human")
nrows, ncols = len(cell_text)+1, len(colLabels)
hcell, wcell = 0.3, 1.
hpad, wpad = 0, 1    

fig=plt.figure()
ax = fig.add_subplot(111)
ax.axis('off')
#do the table
# Add headers and a table at the bottom of the axes
header_0 = plt.table(cellText=[['']*2],
                     colLabels=['Seq F1', 'Set F1'],
                     loc='bottom',
                     bbox=[0, 0.9, 0.5, 0.2]
                     )

table = plt.table(
    cellText=cell_text,
    rowLabels=class_names,
    colLabels=colLabels,
    loc='bottom',
    bbox=[0, 0, .5, 1.0])
plt.subplots_adjust(left=0.2, bottom=0.2)
plt.show()

## Bandpass filter

In [None]:
import featurize

bp = featurize.BandPassFilter()
x_new = bp.filt(x_sample)
plt.plot(x_sample, label="original")
plt.plot(x_new, label="bandpassed")
plt.legend()
plt.show()

## Measuring Improvement With Increase in Training Data
Requires a csv denoting magnification factor per class, and another denoting f1 accuracy per class

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (12, 8)
from numpy import genfromtxt
from scipy.interpolate import UnivariateSpline

def measure(file1, file2):
    train = np.fliplr(genfromtxt(file1, delimiter=',').T)
    accuracy = np.fliplr(genfromtxt(file2, delimiter=',').T)
    labels = ['NSR', 'NOISE', 'AFIB', 'TRIGEMINY', 'SVT', 'WENCKEBACH', \
              'AFL', 'BIGEMINY', 'JUNCTIONAL', 'AVB_TYPE2', 'VT', 'SUDDEN_BRADY', 'EAR', 'PAUSE', 'IVR']
    for index in range(train.shape[0]):
        x = train[index]
        y = accuracy[index]
        spl = UnivariateSpline(x, y, k=1)
        x_r = np.arange(0, max(x) + 1)
        plt.plot(x_r, spl(x_r), label=labels[index])
        plt.scatter(x, y)
        plt.ylabel('Class F1')
        plt.xlabel('# Examples')

    plt.ylim([0, 1])
    plt.legend()
    plt.show()

# measure('../train.csv', '../acc.csv')

## Co-ocurrence

In [None]:
mask = np.zeros((dl.y_train.shape[0], len(processor.classes)))
for i, row in enumerate(np.argmax(dl.y_train, axis=-1)):
    indices = np.unique(row)
    mask[i, indices] = 1

coocurrence = np.dot(mask.T, mask)

def plot_coocurrence(cooccurence):
    cmap = plt.cm.Reds
    plt.imshow(np.log10(coocurrence + 1), interpolation='nearest', cmap=cmap)
    plt.title('Co-occurence matrix')
    plt.colorbar()
    tick_marks = np.arange(len(processor.classes))
    plt.xticks(tick_marks, processor.classes, rotation=90)
    plt.yticks(tick_marks, processor.classes)

    plt.tight_layout()
    plt.show()

plot_coocurrence(coocurrence)

## Data Agreement Rate

In [None]:
from sklearn.metrics import classification_report, confusion_matrix_matrix
args = util.get_object_from_dict(data_path="../data/label_review")
params = json.load(open('../configs/default.json', 'r'))
params["val_frac"] = 0
params["extension"] = '_rev0.episodes.json'
dl1 = load.load(args, params)
y1 = dl1.y_train
params["extension"] = '_rev1.episodes.json'
dl2 = load.load(args, params)
y2 = dl2.y_train

y1_flat = np.argmax(y1, axis=-1).flatten().tolist()
y2_flat = np.argmax(y2, axis=-1).flatten().tolist()

print(classification_report(
        y1_flat, y2_flat,
        target_names=dl.classes))

cnf_matrix = confusion_matrix(y1_flat, y2_flat).tolist()
import evaluate
evaluate.plot_confusion_matrix(np.log10(np.array(cnf_matrix) + 1), dl.classes)
print(cnf_matrix)