# Analysis

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

## Loading Data

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [3]:
test_set = np.load("./results/test_images.npy")
labels = np.load("./results/test_labels.npy")
num_test = len(labels)
results = dict()
models = ["AutoEncoder", "Convolutional0", "Convolutional3", "FullyConnected0", "FullyConnected2", "VAE"]
treatments = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "evens", "odds"]
for model in models:
    for treatment in treatments:
        for i in range(5):
            results[(model, treatment, i)] = np.load("./results/{0}_{1}_{2}.npy".format(model, treatment, i))

## Measuring Performance
We take digit with the highest classifier probability as our prediction and use that to compute accuracy.

In [115]:
performance = dict()
for model in models:
    for treatment in treatments:
        avg = 0
        for i in range(5):
            preds = [np.argmax(softmax(x)) for x in results[(model, treatment, i)]]
            acc = np.sum(preds == labels) / num_test
            avg += acc
        avg /= 5
        performance[(model, treatment)] = avg

In [116]:
t = {"one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9}
new_performance = dict()
for model in models:
    for treatment in t:
        avg = 0
        for i in range(5):
            preds = [softmax(x) for x in results[(model, treatment, i)]]
            n = 0.
            d = 0.
            for j in range(len(labels)):
                if labels[j] == t[treatment]:
                    d += 1.
                    n += np.max(preds[j])
            acc = n / d
            avg += acc
        avg /= 5
        new_performance[(model, treatment)] = avg

## Discritizing Data
We discretize all data to make estimation more tractable.

In [5]:
bins_x = [0.1 * x for x in range(11)]
X = np.vstack([np.digitize(img.reshape(-1), bins_x) for img in test_set])

In [6]:
bins_y = [10 * x for x in range(-5, 5)]
Y_dict = dict()
for model in models:
    for treatment in treatments:
        for i in range(5):
            Y_dict[(model, treatment, i)] = np.digitize(results[(model, treatment, i)], bins_y)

## Estimating Mutual Information
This section can take quite a bit to run.

In [7]:
from sklearn.metrics import mutual_info_score

# https://stackoverflow.com/questions/20491028/optimal-way-to-compute-pairwise-mutual-information-using-numpy
def calc_MI(x, y, bins):
    c_xy = np.histogram2d(x, y, bins)[0]
    mi = mutual_info_score(None, None, contingency=c_xy)
    return mi

In [8]:
med_mut_infos = dict()
max_mut_infos = dict()
min_mut_infos = dict()
mean_mut_infos = dict()
for model in tqdm(models):
    for treatment in tqdm(treatments):
        model_mut_infos = []
        for pixel in range(len(X[0])):
            pixel_vals = X[:, pixel].reshape(-1)
            for i in range(5):
                model_vals = Y_dict[(model, treatment, i)]
                for j in range(10):
                    model_feature_val = model_vals[:, j].reshape(-1)
                    model_mut_infos.append(calc_MI(pixel_vals, model_feature_val, 10))
        med_mut_infos[(model, treatment)] = np.median(model_mut_infos)
        max_mut_infos[(model, treatment)] = np.max(model_mut_infos)
        min_mut_infos[(model, treatment)] = np.min(model_mut_infos)
        mean_mut_infos[(model, treatment)] = np.mean(model_mut_infos)

  0%|          | 0/6 [00:00<?, ?it/s]
  0%|          | 0/11 [00:00<?, ?it/s][A
  9%|▉         | 1/11 [01:16<12:47, 76.76s/it][A
 18%|█▊        | 2/11 [02:31<11:22, 75.88s/it][A
 27%|██▋       | 3/11 [03:49<10:12, 76.61s/it][A
 36%|███▋      | 4/11 [05:12<09:06, 78.04s/it][A
 45%|████▌     | 5/11 [06:33<07:51, 78.60s/it][A
 55%|█████▍    | 6/11 [07:45<06:27, 77.51s/it][A
 64%|██████▎   | 7/11 [08:57<05:07, 76.84s/it][A
 73%|███████▎  | 8/11 [10:11<03:49, 76.47s/it][A
 82%|████████▏ | 9/11 [11:28<02:32, 76.49s/it][A
 91%|█████████ | 10/11 [12:44<01:16, 76.42s/it][A
100%|██████████| 11/11 [14:40<00:00, 80.01s/it][A
 17%|█▋        | 1/6 [14:40<1:13:20, 880.07s/it]
  0%|          | 0/11 [00:00<?, ?it/s][A
  9%|▉         | 1/11 [01:24<14:03, 84.37s/it][A
 18%|█▊        | 2/11 [02:50<12:45, 85.06s/it][A
 27%|██▋       | 3/11 [04:09<11:04, 83.00s/it][A
 36%|███▋      | 4/11 [05:03<08:51, 75.94s/it][A
 45%|████▌     | 5/11 [05:59<07:11, 71.89s/it][A
 55%|█████▍    | 6/11 [06:4

## Determining Correlation
We consider treatments with a single digit removed and half the digits removed seperately.

In [122]:
med_mi_x = []
max_mi_x = []
mean_mi_x = []
min_mi_x = []
acc_y = []
for model in ["AutoEncoder", "VAE"]:
    for treatment in treatments:
        if treatment not in ("evens", "odds"):
            med_mi_x.append(med_mut_infos[(model, treatment)])
            max_mi_x.append(max_mut_infos[(model, treatment)])
            mean_mi_x.append(mean_mut_infos[(model, treatment)])
            min_mi_x.append(min_mut_infos[(model, treatment)])
            acc_y.append(new_performance[(model, treatment)])

In [121]:
fig, ax = plt.subplots()

ax.scatter(med_mi_x, acc_y, color="red", label="med: {0}".format(np.corrcoef(med_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(med_mi_x), np.poly1d(np.polyfit(med_mi_x, acc_y, 1))(np.unique(med_mi_x)), color="red")
ax.scatter(mean_mi_x, acc_y, color="green", label="mean: {0}".format(np.corrcoef(mean_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(mean_mi_x), np.poly1d(np.polyfit(mean_mi_x, acc_y, 1))(np.unique(mean_mi_x)), color="green")
ax.legend()

plt.xlabel("Mutual Information")
plt.ylabel("Accuracy")
plt.title("MI vs. New Confidence on 9-digit Treatments: Neural Models")
plt.savefig("./figs/ird_analysis.png")

In [88]:
fig, ax = plt.subplots()

ax.scatter(max_mi_x, acc_y, color="blue", label="max: {0}".format(np.corrcoef(max_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(max_mi_x), np.poly1d(np.polyfit(max_mi_x, acc_y, 1))(np.unique(max_mi_x)), color="blue")
ax.legend()

plt.xlabel("Mutual Information")
plt.ylabel("Accuracy")
plt.title("MI vs. Acc on 9-digit Treatments: AE Models")
plt.savefig("./figs/ae_max_9.png")



In [89]:
fig, ax = plt.subplots()

ax.scatter(min_mi_x, acc_y, color="purple", label="min: {0}".format(np.corrcoef(min_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(min_mi_x), np.poly1d(np.polyfit(min_mi_x, acc_y, 1))(np.unique(min_mi_x)), color="purple")
ax.legend()

plt.xlabel("Mutual Information")
plt.ylabel("Accuracy")
plt.title("MI vs. Acc on 9-digit Treatments: AE Models")
plt.savefig("./figs/ae_min_9.png")



In [90]:
med_mi_x = []
max_mi_x = []
mean_mi_x = []
min_mi_x = []
acc_y = []
for model in ("AutoEncoder", "VAE"):
    for treatment in ("evens", "odds"):
        med_mi_x.append(med_mut_infos[(model, treatment)])
        max_mi_x.append(max_mut_infos[(model, treatment)])
        mean_mi_x.append(mean_mut_infos[(model, treatment)])
        min_mi_x.append(min_mut_infos[(model, treatment)])
        acc_y.append(performance[(model, treatment)])

In [91]:
fig, ax = plt.subplots()

ax.scatter(med_mi_x, acc_y, color="red", label="med: {0}".format(np.corrcoef(med_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(med_mi_x), np.poly1d(np.polyfit(med_mi_x, acc_y, 1))(np.unique(med_mi_x)), color="red")
ax.scatter(mean_mi_x, acc_y, color="green", label="mean: {0}".format(np.corrcoef(mean_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(mean_mi_x), np.poly1d(np.polyfit(mean_mi_x, acc_y, 1))(np.unique(mean_mi_x)), color="green")
ax.legend()

plt.xlabel("Mutual Information")
plt.ylabel("Accuracy")
plt.title("MI vs. Acc on 5-digit Treatments: AE Models")
plt.savefig("./figs/ae_mid_5.png")



In [92]:
fig, ax = plt.subplots()

ax.scatter(max_mi_x, acc_y, color="blue", label="max: {0}".format(np.corrcoef(max_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(max_mi_x), np.poly1d(np.polyfit(max_mi_x, acc_y, 1))(np.unique(max_mi_x)), color="blue")
ax.legend()

plt.xlabel("Mutual Information")
plt.ylabel("Accuracy")
plt.title("MI vs. Acc on 5-digit Treatments: AE Models")
plt.savefig("./figs/ae_max_5.png")



In [93]:
fig, ax = plt.subplots()

ax.scatter(min_mi_x, acc_y, color="purple", label="min: {0}".format(np.corrcoef(min_mi_x, acc_y)[0, 1]))
plt.plot(np.unique(min_mi_x), np.poly1d(np.polyfit(min_mi_x, acc_y, 1))(np.unique(min_mi_x)), color="purple")
ax.legend()

plt.xlabel("Mutual Information")
plt.ylabel("Accuracy")
plt.title("MI vs. Acc on 5-digit Treatments: AE Models")
plt.savefig("./figs/ae_min_5.png")



In [None]:
#TODO: Across tasks not trained on
# break up into groups