In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from PMM_for_images import ImageEncoder, ClassifierNet

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.distributions import Categorical
from copy import deepcopy

from tqdm import tqdm

In [3]:
def pmm(image, net, ps, lbls,cut=5):
        """
        image is some image dataset
        """
        x_locs = net(image.unsqueeze(0))[0]

        idks = torch.exp(-1*(torch.linalg.vector_norm(x_locs-ps,dim=1)))
        top100_inds = torch.topk(idks, cut)[1]
        p = F.softmax(idks[top100_inds])
        
        img_label = np.random.choice(lbls[top100_inds],p=p.detach().numpy() )


        return img_label

In [4]:
def get_precisions(inputs, target):
    """
    @param inputs: dimension NxC where C is the number of imputation models and N is the number of observations
    @param target: vector containing real labels of dimension Nx1 where N is the number of observations


    """
    accuracy_scores = [ 0 for i in range(inputs.shape[1])]
    for (i,(guesses, true_val)) in enumerate(zip(inputs, target)):
        for t in range(len(accuracy_scores)):
            if i == 0:
                accuracy_scores[t] = 1 if true_val == guesses[t] else 0
            else:
                incr = 1 if true_val == guesses[t] else 0
                accuracy_scores[t] = accuracy_scores[t]*(i)/(i+1) + incr/(i+1)
    return accuracy_scores

## Evaluating all three as missingness increase (MNAR)

In [84]:
data_1000 = pickle.load(open("pmm_data_alpha_1000.pkl",'rb'))
data_5000 = pickle.load(open("pmm_data_alpha_5000.pkl",'rb'))
data_10000 = pickle.load(open("pmm_data_alpha_10000.pkl",'rb'))
data_14000 = pickle.load(open("pmm_data_alpha_14000.pkl",'rb'))

In [87]:
real_labels_1000 = [i[1] for i in data_1000['missing']]
real_labels_5000 = [i[1] for i in data_5000['missing']]
real_labels_10000 = [i[1] for i in data_10000['missing']]
real_labels_14000 = [i[1] for i in data_14000['missing']]
obs_img_1000 = [i[0] for i in data_1000['obs']]
obs_img_5000 = [i[0] for i in data_5000['obs']]
obs_img_10000 = [i[0] for i in data_10000['obs']]
obs_img_14000 = [i[0] for i in data_14000['obs']]
obs_lbls_1000 = np.array([i[1] for i in data_1000['obs']])
obs_lbls_5000 = np.array([i[1] for i in data_5000['obs']])
obs_lbls_10000 = np.array([i[1] for i in data_10000['obs']])
obs_lbls_14000 = np.array([i[1] for i in data_14000['obs']])

missing_1000 = data_1000['missing']
missing_5000 = data_5000['missing']
missing_10000 = data_10000['missing']
missing_14000 = data_14000['missing']

In [88]:
cc1 = torch.load("net_classifier_alpha_1000.pt")
cc5 = torch.load("net_classifier_alpha_5000.pt")
cc10 = torch.load("net_classifier_alpha_10000.pt")
cc14 = torch.load("net_classifier_alpha_14000.pt")
ca1 = torch.load("net_autoencoder_alpha_1000.pt")
ca5 = torch.load("net_autoencoder_alpha_5000.pt")
ca10 = torch.load("net_autoencoder_alpha_10000.pt")
ca14 = torch.load("net_autoencoder_alpha_14000.pt")

n_imputations = 5


precs_1000 = []
precs_5000 = []
precs_10000 = []
precs_14000 = []


pc_1 = cc1(torch.stack(obs_img_1000))
pc_5 = cc5(torch.stack(obs_img_5000))
pc_10 = cc10(torch.stack(obs_img_10000))
pc_14 = cc14(torch.stack(obs_img_14000))
pa_1 = ca1(torch.stack(obs_img_1000))
pa_5 = ca5(torch.stack(obs_img_5000))
pa_10 = ca10(torch.stack(obs_img_10000))
pa_14 = ca14(torch.stack(obs_img_14000))


## classifier on all three
labels_1 = [[],[],[], [],[]]
print('11s')
for img in tqdm(missing_1000):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca1 ,pa_1, obs_lbls_1000)
                pred_label_pc = pmm(image, cc1,pc_1, obs_lbls_1000)
                classifier_probs = cc1(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_1[i].append((pred_label_pa, pred_label_pc, classifier_label))



labels_5 = [[],[],[], [],[]]
print('51s')
for img in tqdm(missing_5000):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca5,pa_5, obs_lbls_5000)
                pred_label_pc = pmm(image, cc5,pc_5, obs_lbls_5000)
                classifier_probs = cc5(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_5[i].append((pred_label_pa, pred_label_pc, classifier_label))


labels_10 = [[],[],[], [],[]]
print('10s')
for img in tqdm(missing_10000):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca10,pa_10, obs_lbls_10000)
                pred_label_pc = pmm(image, cc10,pc_10, obs_lbls_10000)
                classifier_probs = cc10(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_10[i].append((pred_label_pa, pred_label_pc, classifier_label))


labels_14 = [[],[],[], [],[]]
print('14s')
for img in tqdm(missing_14000):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca14, pa_14, obs_lbls_14000)
                pred_label_pc = pmm(image, cc14, pc_14, obs_lbls_14000)
                classifier_probs = cc14(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_14[i].append((pred_label_pa, pred_label_pc, classifier_label))

  x = F.softmax(x)


11s


  p = F.softmax(idks[top100_inds])
100%|██████████| 34929/34929 [09:09<00:00, 63.57it/s]  


51s


100%|██████████| 23895/23895 [07:05<00:00, 56.09it/s]


10s


100%|██████████| 5630/5630 [02:08<00:00, 43.80it/s]


14s


100%|██████████| 1146/1146 [00:25<00:00, 45.47it/s]


In [89]:
#precision for 11
precs_1 = []
for i in range(len(labels_1)):
    precs_1.append(get_precisions(np.array(labels_1[i]), np.array(real_labels_1000)))

precs_1 = np.array(precs_1)

#precisins for 51
precs_5 = []
for i in range(len(labels_5)):
    precs_5.append(get_precisions(np.array(labels_5[i]), np.array(real_labels_5000)))

precs_5 = np.array(precs_5)

#precisions for 101
precs_10 = []
for i in range(len(labels_10)):
    precs_10.append(get_precisions(np.array(labels_10[i]), np.array(real_labels_10000)))

precs_10 = np.array(precs_10)

#precisions for 201
precs_14 = []
for i in range(len(labels_14)):
    precs_14.append(get_precisions(np.array(labels_14[i]), np.array(real_labels_14000)))

precs_14 = np.array(precs_14)

In [122]:
precs_1.mean(axis=0), precs_1.std(axis=0)

(array([0.24918549, 0.11012626, 0.11149475]),
 array([2.41293576e-03, 1.09011894e-03, 1.40255360e-05]))

In [128]:
1.402e-4

0.0001402

In [123]:
precs_5.mean(axis=0), precs_5.std(axis=0)

(array([0.25021134, 0.10358652, 0.05900816]),
 array([0.00155496, 0.00113455, 0.        ]))

In [124]:
precs_10.mean(axis=0), precs_10.std(axis=0)

(array([0.23829485, 0.15179396, 0.18838366]),
 array([0.00424151, 0.00215089, 0.00300509]))

In [125]:
precs_14.mean(axis=0), precs_14.std(axis=0)

(array([0.26771379, 0.09546248, 0.09598604]),
 array([0.00735059, 0.00545218, 0.        ]))

## Evaluating the different types of PMM (MAR)

In [5]:
cc = torch.load("net_classifier.pt")
ca = torch.load("net_autoencoder.pt")


data = pickle.load(open('pmm_data.pkl','rb'))
real_labels = [i[1] for i in data['missing']]
obs_img = [i[0] for i in data['obs']]
obs_lbls = np.array([i[1] for i in data['obs']])
missing = data['missing']

#### type 0

In [7]:
pc = cc(torch.stack(obs_img))
pa = ca(torch.stack(obs_img))
labels_type_0 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca, pa, obs_lbls)
                pred_label_pc = pmm(image, cc, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_0[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t0 = []
for i in range(len(labels_type_0)):
    precs_t0.append(get_precisions(np.array(labels_type_0[i]), np.array(real_labels)))

precs_t0 = np.array(precs_t0)

  p = F.softmax(idks[top100_inds])
100%|██████████| 11127/11127 [03:26<00:00, 53.88it/s]


In [8]:
precs_t0.mean(axis=0), precs_t0.std(axis=0)

(array([0.25144244, 0.10346005, 0.04502561]),
 array([0.00210798, 0.00186742, 0.        ]))

#### type 1

In [None]:
#need to get dot b
sd = 1e-2

cadot = deepcopy(ca)
ccdot = deepcopy(cc)
for param in cadot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccdot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)


pc = cc(torch.stack(obs_img))
pa = ca(torch.stack(obs_img))
labels_type_1 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, cadot, pa, obs_lbls)
                pred_label_pc = pmm(image, ccdot, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_1[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t1 = []
for i in range(len(labels_type_1)):
    precs_t1.append(get_precisions(np.array(labels_type_1[i]), np.array(real_labels)))

precs_t1 = np.array(precs_t1)

  p = F.softmax(idks[top100_inds])
100%|██████████| 11127/11127 [04:09<00:00, 44.54it/s]


In [None]:
precs_t1.mean(axis=0), precs_t1.std(axis=0)

(array([0.20641682, 0.10461041, 0.04502561]),
 array([0.00287611, 0.00154725, 0.        ]))

#### type 2

In [None]:
#need to get dot b
sd = 1e-2

cadot = deepcopy(ca)
ccdot = deepcopy(cc)
for param in cadot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccdot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)


pc = ccdot(torch.stack(obs_img))
pa = cadot(torch.stack(obs_img))
labels_type_2 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, cadot, pa, obs_lbls)
                pred_label_pc = pmm(image, ccdot, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_2[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t2 = []
for i in range(len(labels_type_2)):
    precs_t2.append(get_precisions(np.array(labels_type_2[i]), np.array(real_labels)))

precs_t2 = np.array(precs_t2)

  p = F.softmax(idks[top100_inds])
100%|██████████| 11127/11127 [04:42<00:00, 39.45it/s]


In [None]:
precs_t2.mean(axis=0), precs_t2.std(axis=0)

(array([0.24019053, 0.10484407, 0.04502561]),
 array([0.00300047, 0.00251743, 0.        ]))

#### type 3

In [None]:
#need to get dot b
sd = 1e-2

cadot = deepcopy(ca)
ccdot = deepcopy(cc)
for param in cadot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccdot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)

caddot = deepcopy(cadot)
ccddot = deepcopy(ccdot)
for param in caddot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccddot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)


pc = ccdot(torch.stack(obs_img))
pa = cadot(torch.stack(obs_img))
labels_type_3 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, caddot, pa, obs_lbls)
                pred_label_pc = pmm(image, ccddot, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_3[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t3 = []
for i in range(len(labels_type_3)):
    precs_t3.append(get_precisions(np.array(labels_type_3[i]), np.array(real_labels)))

precs_t3 = np.array(precs_t3)

  p = F.softmax(idks[top100_inds])
100%|██████████| 11127/11127 [04:34<00:00, 40.56it/s]


In [None]:
precs_t3.mean(axis=0), precs_t3.std(axis=0)

(array([0.20898715, 0.10577874, 0.04502561]),
 array([0.00292799, 0.00228139, 0.        ]))

## Evaluating all three as missingness increases (MNAR)

In [9]:
data_11 = pickle.load(open("pmm_data_betas_1_1.pkl",'rb'))
data_101 = pickle.load(open("pmm_data_betas_10_1.pkl",'rb'))
data_201 = pickle.load(open("pmm_data_betas_20_1.pkl",'rb'))

In [10]:
real_labels_11 = [i[1] for i in data_11['missing']]
real_labels_101 = [i[1] for i in data_101['missing']]
real_labels_201 = [i[1] for i in data_201['missing']]

obs_img_11 = [i[0] for i in data_11['obs']]
obs_img_101 = [i[0] for i in data_101['obs']]
obs_img_201 = [i[0] for i in data_201['obs']]



obs_lbls_11 = [i[1] for i in data_11['obs']]
obs_lbls_101 = [i[1] for i in data_101['obs']]
obs_lbls_201 = [i[1] for i in data_201['obs']]

missing_11 = data_11['missing']
missing_101 = data_101['missing']
missing_201 = data_201['missing']

In [11]:
obs_lbls_11 = np.array([i[1] for i in data_11['obs']])
obs_lbls_101 = np.array([i[1] for i in data_101['obs']])
obs_lbls_201 = np.array([i[1] for i in data_201['obs']])

In [12]:
cc11 = torch.load("net_classifier_beta_1_1.pt")
cc51 = torch.load("net_classifier_beta_5_1.pt")
cc101 = torch.load("net_classifier_beta_10_1.pt")
cc201 = torch.load("net_classifier_beta_20_1.pt")
ca11 = torch.load("net_autoencoder_beta_1_1.pt")
ca51 = torch.load("net_autoencoder_beta_5_1.pt")
ca101 = torch.load("net_autoencoder_beta_10_1.pt")
ca201 = torch.load("net_autoencoder_beta_20_1.pt")

n_imputations = 5


pc_11 = cc11(torch.stack(obs_img_11))
pc_101 = cc11(torch.stack(obs_img_101))
pc_201 = cc11(torch.stack(obs_img_201))
pa_11 = ca11(torch.stack(obs_img_11))
pa_101 = ca11(torch.stack(obs_img_101))
pa_201 = ca11(torch.stack(obs_img_201))

precs_11 = []
precs_51 = []
precs_101 = []
precs_201 = []

## classifier on all three
labels_11 = [[],[],[], [],[]]
print('11s')
for img in tqdm(missing_11):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca11 ,pa_11, obs_lbls_11)
                pred_label_pc = pmm(image, cc11,pc_11, obs_lbls_11)
                classifier_probs = cc11(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_11[i].append((pred_label_pa, pred_label_pc, classifier_label))





labels_101 = [[],[],[], [],[]]
print('101s')
for img in tqdm(missing_101):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca101,pa_101, obs_lbls_101)
                pred_label_pc = pmm(image, cc101,pc_101, obs_lbls_101)
                classifier_probs = cc101(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_101[i].append((pred_label_pa, pred_label_pc, classifier_label))


labels_201 = [[],[],[], [],[]]
print('201s')
for img in tqdm(missing_201):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca201, pa_201, obs_lbls_201)
                pred_label_pc = pmm(image, cc201, pc_201, obs_lbls_201)
                classifier_probs = cc201(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_201[i].append((pred_label_pa, pred_label_pc, classifier_label))


11s


  p = F.softmax(idks[top100_inds])
100%|██████████| 29294/29294 [07:23<00:00, 66.09it/s]


101s


100%|██████████| 46775/46775 [10:14<00:00, 76.09it/s]


201s


100%|██████████| 46976/46976 [09:54<00:00, 78.97it/s]


In [13]:
#precision for 11
precs_11 = []
for i in range(len(labels_11)):
    precs_11.append(get_precisions(np.array(labels_11[i]), np.array(real_labels_11)))

precs_11 = np.array(precs_11)

#precisions for 101
precs_101 = []
for i in range(len(labels_11)):
    precs_101.append(get_precisions(np.array(labels_101[i]), np.array(real_labels_101)))

precs_101 = np.array(precs_101)

#precisions for 201
precs_201 = []
for i in range(len(labels_11)):
    precs_201.append(get_precisions(np.array(labels_201[i]), np.array(real_labels_201)))

precs_201 = np.array(precs_201)

In [14]:
precs_11.mean(axis=0), precs_11.std(axis=0)

(array([0.1725268 , 0.05391548, 0.00604219]),
 array([0.00161391, 0.00143773, 0.        ]))

In [15]:
precs_101.mean(axis=0), precs_101.std(axis=0)

(array([0.12253127, 0.09505505, 0.08722608]),
 array([0.00132201, 0.00108351, 0.        ]))

In [16]:
precs_201.mean(axis=0), precs_201.std(axis=0)

(array([0.12064033, 0.0948314 , 0.10060456]),
 array([1.19326833e-03, 6.92206072e-04, 1.38777878e-17]))

## Evaluating the different types of PMM (MNAR)

In [17]:
cc = torch.load("net_classifier_mnar.pt")
ca = torch.load("net_autoencoder_mnar.pt")


data = pickle.load(open('pmm_data_mnar.pkl','rb'))
real_labels = [i[1] for i in data['missing']]
obs_img = [i[0] for i in data['obs']]
obs_lbls = np.array([i[1] for i in data['obs']])
missing = data['missing']

#### type 0
straight up pmm

In [18]:
pc = cc(torch.stack(obs_img))
pa = ca(torch.stack(obs_img))
labels_type_0 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca, pa, obs_lbls)
                pred_label_pc = pmm(image, cc, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_0[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t0 = []
for i in range(len(labels_type_0)):
    precs_t0.append(get_precisions(np.array(labels_type_0[i]), np.array(real_labels)))

precs_t0 = np.array(precs_t0)

  p = F.softmax(idks[top100_inds])
100%|██████████| 28423/28423 [09:15<00:00, 51.17it/s]


In [19]:
precs_t0.mean(axis=0), precs_t0.std(axis=0)

(array([0.19810013, 0.19164761, 0.23042606]),
 array([0.00192737, 0.0011397 , 0.00096547]))

#### type 1

In [20]:
#need to get dot b
sd = 1e-2

cadot = deepcopy(ca)
ccdot = deepcopy(cc)
for param in cadot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccdot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)


pc = cc(torch.stack(obs_img))
pa = ca(torch.stack(obs_img))
labels_type_1 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, cadot, pa, obs_lbls)
                pred_label_pc = pmm(image, ccdot, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_1[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t1 = []
for i in range(len(labels_type_1)):
    precs_t1.append(get_precisions(np.array(labels_type_1[i]), np.array(real_labels)))

precs_t1 = np.array(precs_t1)

  p = F.softmax(idks[top100_inds])
100%|██████████| 28423/28423 [08:47<00:00, 53.86it/s]


In [21]:
precs_t1.mean(axis=0), precs_t1.std(axis=0)

(array([0.14601555, 0.18786194, 0.23078493]),
 array([0.00184707, 0.00023295, 0.00082918]))

#### type 2

In [22]:
#need to get dot b
sd = 1e-2

cadot = deepcopy(ca)
ccdot = deepcopy(cc)
for param in cadot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccdot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)


pc = ccdot(torch.stack(obs_img))
pa = cadot(torch.stack(obs_img))
labels_type_2 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, cadot, pa, obs_lbls)
                pred_label_pc = pmm(image, ccdot, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_2[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t2 = []
for i in range(len(labels_type_2)):
    precs_t2.append(get_precisions(np.array(labels_type_2[i]), np.array(real_labels)))

precs_t2 = np.array(precs_t2)

  p = F.softmax(idks[top100_inds])
100%|██████████| 28423/28423 [08:17<00:00, 57.15it/s]


In [23]:
precs_t2.mean(axis=0), precs_t2.std(axis=0)

(array([0.18826303, 0.18624354, 0.23047532]),
 array([0.0016804 , 0.00206312, 0.0010407 ]))

#### type 3

In [24]:
#need to get dot b
sd = 1e-2

cadot = deepcopy(ca)
ccdot = deepcopy(cc)
for param in cadot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccdot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)

caddot = deepcopy(cadot)
ccddot = deepcopy(ccdot)
for param in caddot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)
for param in ccddot.parameters():
    param.data.copy_(param.data + np.random.random()*sd)


pc = ccdot(torch.stack(obs_img))
pa = cadot(torch.stack(obs_img))
labels_type_3 = [[],[],[],[],[]]
for img in tqdm(missing):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, caddot, pa, obs_lbls)
                pred_label_pc = pmm(image, ccddot, pc, obs_lbls)
                classifier_probs = cc(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_type_3[i].append((pred_label_pa, pred_label_pc, classifier_label))
precs_t3 = []
for i in range(len(labels_type_3)):
    precs_t3.append(get_precisions(np.array(labels_type_3[i]), np.array(real_labels)))

precs_t3 = np.array(precs_t3)

  p = F.softmax(idks[top100_inds])
100%|██████████| 28423/28423 [08:37<00:00, 54.93it/s]


In [25]:
precs_t3.mean(axis=0), precs_t3.std(axis=0)

(array([0.12198572, 0.19461   , 0.23036977]),
 array([0.0006725 , 0.0006436 , 0.00065723]))

### More MNAR lookins

In [26]:
data_11 = pickle.load(open("pmm_data_betas_1_1.pkl",'rb'))
data_1010 = pickle.load(open("pmm_data_betas_10_10.pkl",'rb'))
data_5050 = pickle.load(open("pmm_data_betas_50_50.pkl",'rb'))

In [27]:
real_labels_11 = [i[1] for i in data_11['missing']]
real_labels_1010 = [i[1] for i in data_1010['missing']]
real_labels_5050 = [i[1] for i in data_5050['missing']]

obs_img_11 = [i[0] for i in data_11['obs']]
obs_img_1010 = [i[0] for i in data_1010['obs']]
obs_img_5050 = [i[0] for i in data_5050['obs']]



obs_lbls_11 = [i[1] for i in data_11['obs']]
obs_lbls_1010 = [i[1] for i in data_1010['obs']]
obs_lbls_5050 = [i[1] for i in data_5050['obs']]

missing_11 = data_11['missing']
missing_1010 = data_1010['missing']
missing_5050 = data_5050['missing']

In [28]:
obs_lbls_11 = np.array([i[1] for i in data_11['obs']])
obs_lbls_1010 = np.array([i[1] for i in data_1010['obs']])
obs_lbls_5050 = np.array([i[1] for i in data_5050['obs']])

In [29]:
cc11 = torch.load("net_classifier_beta_1_1.pt")
cc1010 = torch.load("net_classifier_beta_10_10.pt")
cc5050 = torch.load("net_classifier_beta_50_50.pt")
ca11 = torch.load("net_autoencoder_beta_1_1.pt")
ca1010 = torch.load("net_autoencoder_beta_10_10.pt")
ca5050 = torch.load("net_autoencoder_beta_50_50.pt")

n_imputations = 5


pc_11 = cc11(torch.stack(obs_img_11))
pc_1010 = cc1010(torch.stack(obs_img_1010))
pc_5050 = cc5050(torch.stack(obs_img_5050))
pa_11 = ca11(torch.stack(obs_img_11))
pa_1010 = ca1010(torch.stack(obs_img_1010))
pa_5050 = ca5050(torch.stack(obs_img_5050))

precs_11 = []

precs_1010 = []
precs_5050 = []

## classifier on all three
labels_11 = [[],[],[], [],[]]
print('11s')
for img in tqdm(missing_11):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca11 ,pa_11, obs_lbls_11)
                pred_label_pc = pmm(image, cc11,pc_11, obs_lbls_11)
                classifier_probs = cc11(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_11[i].append((pred_label_pa, pred_label_pc, classifier_label))





labels_1010 = [[],[],[], [],[]]
print('1010s')
for img in tqdm(missing_101):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca1010,pa_1010, obs_lbls_1010)
                pred_label_pc = pmm(image, cc1010,pc_1010, obs_lbls_1010)
                classifier_probs = cc101(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_1010[i].append((pred_label_pa, pred_label_pc, classifier_label))


labels_5050 = [[],[],[], [],[]]
print('5050s')
for img in tqdm(missing_201):

        image, pred_label_pc = img[0], img[1]
        
        for i in range(5):
                pred_label_pa = pmm(image, ca5050, pa_5050, obs_lbls_5050)
                pred_label_pc = pmm(image, cc5050, pc_5050, obs_lbls_5050)
                classifier_probs = cc201(image.unsqueeze(0))
                classifier_label = Categorical(classifier_probs).sample().item()


                labels_5050[i].append((pred_label_pa, pred_label_pc, classifier_label))


  x = F.softmax(x)


11s


  p = F.softmax(idks[top100_inds])
100%|██████████| 26573/26573 [06:52<00:00, 64.44it/s]


1010s


100%|██████████| 46775/46775 [13:00<00:00, 59.91it/s]


5050s


100%|██████████| 46976/46976 [14:40<00:00, 53.36it/s]


In [30]:

precs_11 = []
for i in range(len(labels_11)):
    precs_11.append(get_precisions(np.array(labels_11[i]), np.array(real_labels_11)))

precs_11 = np.array(precs_11)

#precisions for 101
precs_1010 = []
for i in range(len(labels_11)):
    precs_1010.append(get_precisions(np.array(labels_1010[i]), np.array(real_labels_1010)))

precs_1010 = np.array(precs_1010)

#precisions for 201
precs_5050 = []
for i in range(len(labels_11)):
    precs_5050.append(get_precisions(np.array(labels_5050[i]), np.array(real_labels_5050)))

precs_5050 = np.array(precs_5050)

In [31]:
precs_11.mean(axis=0), precs_11.std(axis=0)

(array([0.21251646, 0.08390472, 0.01053701]),
 array([0.00119132, 0.00080452, 0.        ]))

In [32]:
precs_1010.mean(axis=0), precs_1010.std(axis=0)

(array([0.09663756, 0.06559436, 0.04553462]),
 array([0.00082265, 0.00143485, 0.        ]))

In [33]:
precs_5050.mean(axis=0), precs_5050.std(axis=0)

(array([0.09983087, 0.10144796, 0.09224042]),
 array([0.00161001, 0.00112986, 0.        ]))