In [None]:
%load_ext autoreload 
%autoreload 2
%matplotlib notebook
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (9, 6)
import random
import numpy as np
import sys
sys.path.append('../ecg')

## Loading Data

In [None]:
LOAD_FROM_FILE = False
LOAD_FROM_MODEL = False
model_path = "../saved/3.1/1485643942/0.317-0.908-033-0.222-0.927.hdf5"

import load
import json
import util

args = util.get_object_from_dict(data_path="../data")

if LOAD_FROM_FILE is True:
    params = json.load(open('../configs/vanilla_load.json', 'r'))
elif LOAD_FROM_MODEL is True:
    params = util.get_model_params(model_path)
else:
    params = {"step": 256, "toy": False}

step = params["step"]

dl = load.load(args, params)
print(dl.class_to_int)

In [None]:
predictions = None
if LOAD_FROM_MODEL is True:
    predictions = np.load(open(util.get_prediction_path_for_model(model_path, 'test'), 'rb'))

In [None]:
x = dl.x_test
y = dl.y_test

def from_one_hot_to_int(label):
    return np.argmax(label, axis=-1)

def get_x_y_predictions_at_index(index):
    x_sample = x[index]
    y_sample = from_one_hot_to_int(y[index])
    y_prediction = None
    if predictions is not None:
        y_prediction = np.argmax(predictions[index], axis=-1)
    return x_sample, y_sample, y_prediction

def get_sample_from_classes(categories, min_mistakes = 20, num_tries = 1000):
    classes = np.array([dl.class_to_int[c] for c in categories])
    y_maxed = np.argmax(y, axis=-1)
    indices = np.where(np.array([np.in1d(classes, row).all() for row in y_maxed]))[0]
    for _ in range(num_tries):
        index = random.choice(indices)
        y_prediction = None
        x_sample, y_sample, y_prediction = get_x_y_predictions_at_index(index)
        num_wrong = 0
        if y_prediction is None:
            break
        num_wrong = np.sum(y_sample != y_prediction)
        if (num_wrong > min_mistakes):
            print("Prediction got wrong " +  str(num_wrong * 1.0 / len(y_sample)))
            break
    return x_sample, y_sample, y_prediction

x_sample, y_sample, y_prediction = get_sample_from_classes([u'NSR', u'NOISE'])

In [None]:
import matplotlib.cm as cm

def from_int_to_name(l):
    return dl.classes[l]

def draw_sample(x_sample, y_sample, y_prediction):
    colors = cm.rainbow(np.linspace(0, 1, 20))
    y_times = np.linspace(step/2, len(x_sample) - step/2, len(y_sample))
    for y_chosen in [y_sample]:
        for y_uniq in np.unique(y_chosen):
            plt.vlines(
                y_times[y_chosen == y_uniq],
                min(x_sample),
                max(x_sample),
                label=from_int_to_name(y_uniq),
                color=colors[y_uniq],
                alpha=1
            )
        y_times += 20
    print(np.array(dl.classes)[y_sample])
    print(np.array(dl.classes)[y_prediction])
    plt.plot(x_sample, color='#999999', alpha=1)
    plt.legend()
    plt.show()


draw_sample(x_sample, y_sample, y_prediction)

## Bandpass filter

In [None]:
import featurize

bp = featurize.BandPassFilter()
x_new = bp.filt(x_sample)
plt.plot(x_sample, label="original")
plt.plot(x_new, label="bandpassed")
plt.legend()
plt.show()

## Measuring Improvement With Increase in Training Data
Requires a csv denoting magnification factor per class, and another denoting f1 accuracy per class

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (12, 8)
from numpy import genfromtxt
from scipy.interpolate import UnivariateSpline

def measure(file1, file2):
    train = np.fliplr(genfromtxt(file1, delimiter=',').T)
    accuracy = np.fliplr(genfromtxt(file2, delimiter=',').T)
    labels = ['NSR', 'NOISE', 'AFIB', 'TRIGEMINY', 'SVT', 'WENCKEBACH', \
              'AFL', 'BIGEMINY', 'JUNCTIONAL', 'AVB_TYPE2', 'VT', 'SUDDEN_BRADY', 'EAR', 'PAUSE', 'IVR']
    for index in range(train.shape[0]):
        x = train[index]
        y = accuracy[index]
        spl = UnivariateSpline(x, y, k=1)
        x_r = np.arange(0, max(x) + 1)
        plt.plot(x_r, spl(x_r), label=labels[index])
        plt.scatter(x, y)
        plt.ylabel('Class F1')
        plt.xlabel('# Examples')

    plt.ylim([0, 1])
    plt.legend()
    plt.show()

# measure('../train.csv', '../acc.csv')

## Co-ocurrence

In [None]:
mask = np.zeros((dl.y_train.shape[0], dl.output_dim))
for i, row in enumerate(np.argmax(dl.y_train, axis=-1)):
    indices = np.unique(row)
    mask[i, indices] = 1

coocurrence = np.dot(mask.T, mask)

def plot_coocurrence(cooccurence):
    cmap = plt.cm.Reds
    plt.imshow(np.log10(coocurrence + 1), interpolation='nearest', cmap=cmap)
    plt.title('Co-occurence matrix')
    plt.colorbar()
    tick_marks = np.arange(len(dl.classes))
    plt.xticks(tick_marks, dl.classes, rotation=90)
    plt.yticks(tick_marks, dl.classes)

    plt.tight_layout()
    plt.show()

plot_coocurrence(coocurrence)

## Data Agreement Rate

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
args = util.get_object_from_dict(data_path="../data/label_review")
params = json.load(open('../configs/default.json', 'r'))
params["val_frac"] = 0.5
params["extension"] = '_rev0.episodes.json'
dl1 = load.load(args, params)
y1 = np.concatenate((dl1.y_train, dl1.y_test), axis=0)
params["extension"] = '_rev1.episodes.json'
dl2 = load.load(args, params)
y2 = np.concatenate((dl2.y_train, dl2.y_test), axis=0)

y1_flat = np.argmax(y1, axis=-1).flatten().tolist()
y2_flat = np.argmax(y2, axis=-1).flatten().tolist()

print(classification_report(
        y1_flat, y2_flat,
        target_names=dl.classes))

cnf_matrix = confusion_matrix(y1_flat, y2_flat).tolist()
import evaluate
evaluate.plot_confusion_matrix(np.log10(np.array(cnf_matrix) + 1), dl.classes)
print(cnf_matrix)