# 0. Prepare

In [51]:
import sys
sys.path.append('../dataset/')
sys.path.append('../network/')
sys.path.append('../model/')

import os
import glob
import time
import torch
import logging
import argparse
import numpy as np
import pandas as pd
import torch.optim as optim
import torch.nn as nn
import torch
import seaborn as sns

from pathlib import Path
from main_loading import *
from main_network import *
from main_model_rec import *
from main_model_one_class import *
from scipy.spatial import KDTree
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import NearestNeighbors

import matplotlib.pyplot as plt
%config InlineBackend.figure_format='retina'

In [20]:
device = 'cuda:1'
root = '/net/leksai/data/FashionMNIST'
rec_model_path = '/net/leksai/nips/model/rec/fmnist/rec_unsupervised_[2]_[]_[0.0]/net_fmnist_LeNet_rec_eta_100_epochs_150_batch_128/model.tar'
oc_model_path = '/net/leksai/nips/model/one_class/fmnist/one_class_unsupervised_[2]_[]_[1]_[0.0]/net_fmnist_LeNet_one_class_eta_100_epochs_150_batch_128/model.tar'

# 1. Load Only the Encoder Part

> ## For One-Class Model

In [25]:
class OneClassEncoder:
    def __init__(self):
        self.net = None
        self.net_name = None

    def set_network(self, net_name):
        self.net_name = net_name
        self.net = build_network(net_name)

    def load_model(self, model_path, map_location):
        model_dict = torch.load(model_path, map_location=map_location)
        self.c = model_dict['c']
        self.net.load_state_dict(model_dict['net_dict'])

    def test(self, train, dataset, device, batch_size, n_jobs_dataloader):
        if train:
            all_loader, _ = dataset.loaders(batch_size=batch_size,
                                            num_workers=n_jobs_dataloader)
        else:
            all_loader = dataset.loaders(batch_size=batch_size,
                                         num_workers=n_jobs_dataloader)
        net = self.net.to(device)
        criterion = nn.MSELoss(reduction='none')
        
        n_batches = 0
        X_pred_list = []
        net.eval()
        
        with torch.no_grad():
            for data in all_loader:
                X, y, idx = data
                X, y, idx = X.to(device), y.to(device), idx.to(device)

                X_pred = net(X)
                X_pred_list += X_pred
        
        return np.array(X_pred_list)

In [26]:
oc_encoder = OneClassEncoder()
oc_encoder.set_network('fmnist_LeNet_one_class')
oc_encoder.load_model(oc_model_path, device)

> ## For Reconstruction Model

In [27]:
class RecEncoder:
    def __init__(self):

        self.net_name = None
        self.net = None
        self.ae_net = None


    def set_network(self, net_name: str='fmnist_LeNet_one_class'):
        """
        Set the network structure for the model.
        The key here is to initialize <self.net>.
        """
        self.net_name = net_name
        self.net = build_network(net_name)
        self.ae_net = build_network('fmnist_LeNet_rec')

    def load_model(self,
                   model_path,
                   map_location='cuda:1'):
        """
        Load the trained model for the model.
        The key here is to initialize <self.c>.
        """
        # Load the general model
        model_dict = torch.load(model_path, map_location=map_location)
        self.ae_net.load_state_dict(model_dict['net_dict'])
        
        # Obtain the net dictionary
        net_dict = self.net.state_dict()
        ae_net_dict = self.ae_net.state_dict()
        
        # Filter out decoder network keys
        ae_net_dict = {k: v for k, v in ae_net_dict.items() if k in net_dict}
        
        # Overwrite values in the existing state_dict
        net_dict.update(ae_net_dict)

        # Load the new state_dict
        self.net.load_state_dict(net_dict)
        

    def save_model(self, export_model, save_ae=True):
        net_dict = self.net.state_dict()
        torch.save({'net_dict': net_dict}, export_model)
    
    def test(self, train, dataset, device, batch_size, n_jobs_dataloader):
        if train:
            all_loader, _ = dataset.loaders(batch_size=batch_size,
                                            num_workers=n_jobs_dataloader)
        else:
            all_loader = dataset.loaders(batch_size=batch_size,
                                         num_workers=n_jobs_dataloader)
        net = self.net.to(device)
        criterion = nn.MSELoss(reduction='none')
        
        n_batches = 0
        X_pred_list = []
        net.eval()
        
        with torch.no_grad():
            for data in all_loader:
                X, y, idx = data
                X, y, idx = X.to(device), y.to(device), idx.to(device)

                X_pred = net(X)
                X_pred_list += X_pred
        
        return np.array(X_pred_list)

In [36]:
rec_encoder = RecEncoder()
rec_encoder.set_network()
rec_encoder.load_model(rec_model_path, device)

# 2. Dataset Loading

In [21]:
dataset_dict_train = {}
dataset_dict_all = {}
name_list = ['tshirt', 'trouser', 'pullover', 'dress', 'coat',
             'sandal', 'shirt', 'sneaker', 'bag', 'boot']

In [22]:
for i, name in enumerate(name_list):
    dataset_dict_train[name] = load_dataset(loader_name='fmnist',
                                            root=root,
                                            label_normal=(i,),
                                            ratio_abnormal=0)
    dataset_dict_all[name] = load_dataset(loader_name='fmnist_eval',
                                           root=root,
                                           label_eval=(i,))

Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!


# 3. Latent Vector Obtaining

> Let's use the unsupervised model of `pullover` as feature extractor, meaning that the latent space are defined from the neural weights of the unsupervised model of `pullover`.

In [30]:
latent_dict_train = {'oc':{}, 'rec':{}}
latent_dict_all = {'oc':{}, 'rec':{}}

> ## For One Class Model

In [31]:
for name in name_list:
    dataset_train = dataset_dict_train[name]
    data_train = oc_encoder.test(True, dataset_train, device, 6000, 0)
    data_train = np.array([x.cpu().numpy() for x in data_train])
    latent_dict_train['oc'][name] = data_train
    
    dataset_all = dataset_dict_all[name]
    data_all = oc_encoder.test(False, dataset_all, device, 7000, 0)
    data_all = np.array([x.cpu().numpy() for x in data_all])
    latent_dict_all['oc'][name] = data_all

> ## For Rec Model

In [37]:
for name in name_list:
    dataset_train = dataset_dict_train[name]
    data_train = rec_encoder.test(True, dataset_train, device, 6000, 0)
    data_train = np.array([x.cpu().numpy() for x in data_train])
    latent_dict_train['rec'][name] = data_train
    
    dataset_all = dataset_dict_all[name]
    data_all = rec_encoder.test(False, dataset_all, device, 7000, 0)
    data_all = np.array([x.cpu().numpy() for x in data_all])
    latent_dict_all['rec'][name] = data_all

# Function for KL Divergence by KNN

In [50]:
from sklearn.neighbors import NearestNeighbors
from scipy.spatial import KDTree

In [48]:
# Cited from https://github.com/nhartland/KL-divergence-estimators

def knn_distance(point, sample, k):
    """ 
    Euclidean distance from `point` to it's `k`-Nearest
    Neighbour in `sample` 
    """
    norms = np.linalg.norm(sample-point, axis=1)
    return np.sort(norms)[k]


def verify_sample_shapes(s1, s2, k):
    # Expects [N, D]
    assert(len(s1.shape) == len(s2.shape) == 2)
    # Check dimensionality of sample is identical
    assert(s1.shape[1] == s2.shape[1])
    
    
def skl_estimator(s1, s2, k=1):
    """ 
    KL-Divergence estimator using scikit-learn's NearestNeighbours.
    Inputs:
        s1: (N_1,D) Sample drawn from distribution P
        s2: (N_2,D) Sample drawn from distribution Q
        k: Number of neighbours considered (default 1)
    return: 
        estimated D(P|Q)
    """
    verify_sample_shapes(s1, s2, k)

    n, m = len(s1), len(s2)
    d = float(s1.shape[1])
    D = np.log(m / (n - 1))

    s1_neighbourhood = NearestNeighbors(k + 1, 10).fit(s1)
    s2_neighbourhood = NearestNeighbors(k, 10).fit(s2)

    for p1 in s1:
        s1_distances, indices = s1_neighbourhood.kneighbors([p1], k + 1)
        s2_distances, indices = s2_neighbourhood.kneighbors([p1], k)
        rho = s1_distances[0][- 1]
        nu = s2_distances[0][- 1]
        D += (d / n) * np.log(nu / rho)
    return D


def scipy_estimator(s1, s2, k=1):
    """ KL-Divergence estimator using scipy's KDTree
        s1: (N_1,D) Sample drawn from distribution P
        s2: (N_2,D) Sample drawn from distribution Q
        k: Number of neighbours considered (default 1)
        return: estimated D(P|Q)
    """
    verify_sample_shapes(s1, s2, k)

    n, m = len(s1), len(s2)
    d = float(s1.shape[1])
    D = np.log(m / (n - 1))

    nu_d,  nu_i   = KDTree(s2).query(s1, k)
    rho_d, rhio_i = KDTree(s1).query(s1, k+1)

    # KTree.query returns different shape in k==1 vs k > 1
    if k > 1:
        D += (d/n)*np.sum(np.log(nu_d[::, -1] / rho_d[::, -1]))
    else:
        D += (d/n)*np.sum(np.log(nu_d / rho_d[::, -1]))

    return D

# Calculating Joint KL Divergence

> ## For One Class Model

**Load for extra data**

In [92]:
pullover_train = latent_dict_train['oc']['pullover']

In [66]:
dataset_pullover_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(2,),
                                     test_eval=True)
pullover_test = oc_encoder.test(False, dataset_pullover_test, device, 1000, 0)
pullover_test = np.array([x.cpu().numpy() for x in pullover_test])

In [67]:
dataset_coat_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(4,),
                                     test_eval=True)
coat_test = oc_encoder.test(False, dataset_coat_test, device, 1000, 0)
coat_test = np.array([x.cpu().numpy() for x in coat_test])

In [68]:
dataset_sneaker_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(7,),
                                     test_eval=True)
sneaker_test = oc_encoder.test(False, dataset_sneaker_test, device, 1000, 0)
sneaker_test = np.array([x.cpu().numpy() for x in sneaker_test])

In [80]:
pullover_coat_train = np.r_[latent_dict_train['oc']['pullover'], latent_dict_train['oc']['coat']]
pullover_sneaker_train = np.r_[latent_dict_train['oc']['pullover'], latent_dict_train['oc']['sneaker']]

pullover_coat_test_ = np.r_[pullover_test, coat_test]
pullover_sneaker_test_ = np.r_[pullover_test, sneaker_test]

**For Pullover & Coat**

> Joint Distribution

In [84]:
dict_pullover_coat = {}
dict_pullover_coat['coat'] = skl_estimator(pullover_coat_train, pullover_coat_test_)

In [90]:
for x in name_list:
    if x in ['coat', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['oc'][x]]
    kl_ = skl_estimator(pullover_coat_train, joint)
    dict_pullover_coat[x] = kl_
    print(x, kl_)

tshirt 6.678524225035061
trouser 7.185524170868892
dress 6.070237151115829
sandal 7.661263468168078
shirt 3.8714684233428267
sneaker 7.699502890825909
bag 7.41631880409517
boot 7.716410785422828


In [94]:
dict_pullover_coat

{'coat': 7.661263468168078,
 'tshirt': 6.678524225035061,
 'trouser': 7.185524170868892,
 'dress': 6.070237151115829,
 'sandal': 7.661263468168078,
 'shirt': 3.8714684233428267,
 'sneaker': 7.699502890825909,
 'bag': 7.41631880409517,
 'boot': 7.716410785422828}

> Marginal Distribution

In [91]:
dict_pullover_coat_ = {}
dict_pullover_coat_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['oc']['coat'])

In [93]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['coat']:
        marginal = coat_test
    else:
        marginal = latent_dict_all['oc'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_coat_[x] = kl_
    print(x, kl_)

tshirt 11.072399005169489
trouser 18.745953300786717
dress 13.76077460303965
coat 7.160040157098573
sandal 21.11867218781776
shirt 4.209448972482385
sneaker 23.227279890195163
bag 21.15262816948465
boot 27.834980624381743


In [97]:
dict_pullover_coat_

{'training divergence': 4.568845462776275,
 'tshirt': 11.072399005169489,
 'trouser': 18.745953300786717,
 'dress': 13.76077460303965,
 'coat': 7.160040157098573,
 'sandal': 21.11867218781776,
 'shirt': 4.209448972482385,
 'sneaker': 23.227279890195163,
 'bag': 21.15262816948465,
 'boot': 27.834980624381743}

**For Pullover & Sneaker**

> Joint Distribution

In [102]:
dict_pullover_sneaker = {}
dict_pullover_sneaker['sneaker'] = skl_estimator(pullover_sneaker_train, pullover_sneaker_test_)

In [103]:
for x in name_list:
    if x in ['sneaker', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['oc'][x]]
    kl_ = skl_estimator(pullover_sneaker_train, joint)
    dict_pullover_sneaker[x] = kl_
    print(x, kl_)

tshirt 27.749288133239418
trouser 28.80863523884461
dress 27.979325455792644
coat 26.085610547476527
sandal 12.502880735513374
shirt 25.872357051664995
bag 21.306602137031792
boot 14.474244197800164


In [104]:
dict_pullover_sneaker

{'sneaker': 4.945477682473212,
 'tshirt': 27.749288133239418,
 'trouser': 28.80863523884461,
 'dress': 27.979325455792644,
 'coat': 26.085610547476527,
 'sandal': 12.502880735513374,
 'shirt': 25.872357051664995,
 'bag': 21.306602137031792,
 'boot': 14.474244197800164}

> Marginal Distribution

In [111]:
dict_pullover_sneaker_ = {}
dict_pullover_sneaker_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['oc']['sneaker'])

In [114]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['sneaker']:
        marginal = sneaker_test
    else:
        marginal = latent_dict_all['oc'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_sneaker_[x] = kl_
    print(x, kl_)

tshirt 11.072399005169489
trouser 18.745953300786717
dress 13.76077460303965
coat 4.339410330539567
sandal 21.11867218781776
shirt 4.209448972482385
sneaker 24.12124078252928
bag 21.15262816948465
boot 27.834980624381743


In [115]:
dict_pullover_sneaker_

{'training divergence': 23.674082066947456,
 'tshirt': 11.072399005169489,
 'trouser': 18.745953300786717,
 'dress': 13.76077460303965,
 'coat': 4.339410330539567,
 'sandal': 21.11867218781776,
 'shirt': 4.209448972482385,
 'sneaker': 24.12124078252928,
 'bag': 21.15262816948465,
 'boot': 27.834980624381743}

> ## For Reconstruction Model

In [116]:
pullover_train = latent_dict_train['rec']['pullover']

dataset_pullover_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(2,),
                                     test_eval=True)
pullover_test = rec_encoder.test(False, dataset_pullover_test, device, 1000, 0)
pullover_test = np.array([x.cpu().numpy() for x in pullover_test])

In [117]:
dataset_coat_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(4,),
                                     test_eval=True)
coat_test = rec_encoder.test(False, dataset_coat_test, device, 1000, 0)
coat_test = np.array([x.cpu().numpy() for x in coat_test])

In [118]:
dataset_sneaker_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(7,),
                                     test_eval=True)
sneaker_test = rec_encoder.test(False, dataset_sneaker_test, device, 1000, 0)
sneaker_test = np.array([x.cpu().numpy() for x in sneaker_test])

In [119]:
pullover_coat_train = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec']['coat']]
pullover_sneaker_train = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec']['sneaker']]

pullover_coat_test_ = np.r_[pullover_test, coat_test]
pullover_sneaker_test_ = np.r_[pullover_test, sneaker_test]

**For Pullover & Coat**

> Joint Distribution

In [120]:
dict_pullover_coat = {}
dict_pullover_coat['coat'] = skl_estimator(pullover_coat_train, pullover_coat_test_)

In [121]:
for x in name_list:
    if x in ['coat', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['rec'][x]]
    kl_ = skl_estimator(pullover_coat_train, joint)
    dict_pullover_coat[x] = kl_
    print(x, kl_)

tshirt 11.120735874828831
trouser 11.637051307975428
dress 9.935287005951649
sandal 12.333502323614523
shirt 6.703082442223101
sneaker 12.347926201399233
bag 11.785093318871137
boot 12.351785909246662


In [122]:
dict_pullover_coat

{'coat': 5.7802951521432515,
 'tshirt': 11.120735874828831,
 'trouser': 11.637051307975428,
 'dress': 9.935287005951649,
 'sandal': 12.333502323614523,
 'shirt': 6.703082442223101,
 'sneaker': 12.347926201399233,
 'bag': 11.785093318871137,
 'boot': 12.351785909246662}

> Marginal Distribution

In [123]:
dict_pullover_coat_ = {}
dict_pullover_coat_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['rec']['coat'])

In [124]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['coat']:
        marginal = coat_test
    else:
        marginal = latent_dict_all['rec'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_coat_[x] = kl_
    print(x, kl_)

tshirt 22.419006960163852
trouser 29.222279733304465
dress 25.72757446330517
coat 13.3593069721264
sandal 42.72670602945978
shirt 8.374302806296916
sneaker 51.56025831856643
bag 27.437054142348256
boot 42.617041903532666


In [125]:
dict_pullover_coat_

{'training divergence': 8.94335837191094,
 'tshirt': 22.419006960163852,
 'trouser': 29.222279733304465,
 'dress': 25.72757446330517,
 'coat': 13.3593069721264,
 'sandal': 42.72670602945978,
 'shirt': 8.374302806296916,
 'sneaker': 51.56025831856643,
 'bag': 27.437054142348256,
 'boot': 42.617041903532666}

**For Pullover & Sneaker**

> Joint Distribution

In [126]:
dict_pullover_sneaker = {}
dict_pullover_sneaker['sneaker'] = skl_estimator(pullover_sneaker_train, pullover_sneaker_test_)

In [127]:
for x in name_list:
    if x in ['sneaker', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['rec'][x]]
    kl_ = skl_estimator(pullover_sneaker_train, joint)
    dict_pullover_sneaker[x] = kl_
    print(x, kl_)

tshirt 27.240742390932915
trouser 32.03923934051166
dress 30.509918074422842
coat 28.90765698237408
sandal 11.615368704008327
shirt 25.05988784523028
bag 18.78444655946627
boot 13.841945709052613


In [128]:
dict_pullover_sneaker

{'sneaker': 5.553674691659444,
 'tshirt': 27.240742390932915,
 'trouser': 32.03923934051166,
 'dress': 30.509918074422842,
 'coat': 28.90765698237408,
 'sandal': 11.615368704008327,
 'shirt': 25.05988784523028,
 'bag': 18.78444655946627,
 'boot': 13.841945709052613}

> Marginal Distribution

In [129]:
dict_pullover_sneaker_ = {}
dict_pullover_sneaker_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['rec']['sneaker'])

In [130]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['sneaker']:
        marginal = sneaker_test
    else:
        marginal = latent_dict_all['rec'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_sneaker_[x] = kl_
    print(x, kl_)

tshirt 22.419006960163852
trouser 29.222279733304465
dress 25.72757446330517
coat 8.561594577078122
sandal 42.72670602945978
shirt 8.374302806296916
sneaker 58.13651305406425
bag 27.437054142348256
boot 42.617041903532666


In [131]:
dict_pullover_sneaker_

{'training divergence': 51.440300511933614,
 'tshirt': 22.419006960163852,
 'trouser': 29.222279733304465,
 'dress': 25.72757446330517,
 'coat': 8.561594577078122,
 'sandal': 42.72670602945978,
 'shirt': 8.374302806296916,
 'sneaker': 58.13651305406425,
 'bag': 27.437054142348256,
 'boot': 42.617041903532666}