In [1]:
from model import cnn_autoencoder
from config import *

import numpy as np 
import pandas as pd
import glob
import matplotlib.pyplot as plt

import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import torch.optim as optim

import torchvision.datasets as datasets
from torchvision import transforms

In [2]:
# load data
trans = transforms.ToTensor()
dataset = datasets.MNIST(root='dataset/', train=True, transform=trans, download=True)

In [3]:
# get data subsets
# get indicies of 5, 6, 8
idx5 = [i for i, (_, label) in enumerate(dataset) if label == 5]
idx6 = [i for i, (_, label) in enumerate(dataset) if label == 6]
idx8 = [i for i, (_, label) in enumerate(dataset) if label == 8]
idx9 = [i for i, (_, label) in enumerate(dataset) if label == 9]

# create dataloaders
five_loader = DataLoader(dataset=Subset(dataset, idx5[:-100]), batch_size=BATCH_SIZE, shuffle=True)
six_loader = DataLoader(dataset=Subset(dataset, idx6[:-100]), batch_size=BATCH_SIZE, shuffle=True)
eight_loader = DataLoader(dataset=Subset(dataset, idx8[:-100]), batch_size=BATCH_SIZE, shuffle=True)
nine_loader = DataLoader(dataset=Subset(dataset, idx9[:-100]), batch_size=BATCH_SIZE, shuffle=True)

idx5689 = np.hstack([idx5[-100:], idx6[-100:], idx8[-100:], idx9[-100:]])
print(len(idx5689))
test5689_loader = DataLoader(dataset=Subset(dataset, idx5689), batch_size=BATCH_SIZE, shuffle=False)

400


In [4]:
# multiple models trained with different subsets of data
# get state dicts from previous training
sdfiles = glob.glob('./*.pt')
sdfiles

['.\\cnnae5_statedict.pt',
 '.\\cnnae6_statedict.pt',
 '.\\cnnae8_statedict.pt',
 '.\\cnnae9_statedict.pt']

In [5]:
# dataframe to store autoencoder 'scores'
df = pd.DataFrame()

# get scores for data under each model
for i, f in enumerate(sdfiles):
    # load weights from previous training
    cnnmodel = cnn_autoencoder().to(DEVICE)
    cnnmodel.load_state_dict(torch.load(f'./{f}', map_location=torch.device(DEVICE)))
    loss_func = nn.MSELoss()
    cnnmodel.eval()

    # autoencoder evaluation loop
    outputs = []
    for (x, label) in test5689_loader:
        # forward
        x = x.to(DEVICE)
        with torch.no_grad():
            reconstructed_x = cnnmodel(x)

        # compute losses, 'scores'
        for item in range(len(x)):
            loss = loss_func(reconstructed_x[item], x[item])
            outputs.append((loss, x[item], reconstructed_x[item], label[item]))
        
    # store scores in dataframe
    scores = np.hstack([outputs[n][0].cpu().numpy() for n in range(len(outputs))])
    df[f'scores{i}'] = scores

labels = np.hstack([outputs[i][3].cpu().numpy() for i in range(len(outputs))])
df['labels'] = labels

In [6]:
df.head()

Unnamed: 0,scores0,scores1,scores2,scores3,labels
0,0.015366,0.033622,0.019755,0.051129,5
1,0.015107,0.042163,0.027218,0.066016,5
2,0.021128,0.048342,0.036074,0.073334,5
3,0.008867,0.038759,0.02744,0.048807,5
4,0.005918,0.03902,0.025115,0.030302,5


In [7]:
df.to_csv('ae_scores.csv')