# 0. Prepare

In [1]:
import sys
sys.path.append('../dataset/')
sys.path.append('../network/')
sys.path.append('../model/')

import os
import glob
import time
import torch
import joblib
import logging
import argparse
import numpy as np
import pandas as pd
import torch.optim as optim
import torch.nn as nn
import torch
import seaborn as sns

from pathlib import Path
from main_loading import *
from main_network import *
from main_model_rec import *
from main_model_one_class import *
from scipy.spatial import KDTree
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import NearestNeighbors

import matplotlib.pyplot as plt
%config InlineBackend.figure_format='retina'

In [2]:
device = 'cuda:1'
root = '/net/leksai/data/FashionMNIST'
rec_model_path = '/net/leksai/nips/model/rec/fmnist/rec_unsupervised_[2]_[]_[0.0]/net_fmnist_LeNet_rec_eta_100_epochs_150_batch_128/model.tar'
oc_model_path = '/net/leksai/nips/model/one_class/fmnist/one_class_unsupervised_[2]_[]_[1]_[0.0]/net_fmnist_LeNet_one_class_eta_100_epochs_150_batch_128/model.tar'

# 1. Load Only the Encoder Part

> ## For One-Class Model

In [3]:
class OneClassEncoder:
    def __init__(self):
        self.net = None
        self.net_name = None

    def set_network(self, net_name):
        self.net_name = net_name
        self.net = build_network(net_name)

    def load_model(self, model_path, map_location):
        model_dict = torch.load(model_path, map_location=map_location)
        self.c = model_dict['c']
        self.net.load_state_dict(model_dict['net_dict'])

    def test(self, train, dataset, device, batch_size, n_jobs_dataloader):
        if train:
            all_loader, _ = dataset.loaders(batch_size=batch_size,
                                            num_workers=n_jobs_dataloader)
        else:
            all_loader = dataset.loaders(batch_size=batch_size,
                                         num_workers=n_jobs_dataloader)
        net = self.net.to(device)
        criterion = nn.MSELoss(reduction='none')
        
        n_batches = 0
        X_pred_list = []
        net.eval()
        
        with torch.no_grad():
            for data in all_loader:
                X, y, idx = data
                X, y, idx = X.to(device), y.to(device), idx.to(device)

                X_pred = net(X)
                X_pred_list += X_pred
        
        return np.array(X_pred_list)

In [4]:
oc_encoder = OneClassEncoder()
oc_encoder.set_network('fmnist_LeNet_one_class')
oc_encoder.load_model(oc_model_path, device)

> ## For Reconstruction Model

In [5]:
class RecEncoder:
    def __init__(self):

        self.net_name = None
        self.net = None
        self.ae_net = None


    def set_network(self, net_name: str='fmnist_LeNet_one_class'):
        """
        Set the network structure for the model.
        The key here is to initialize <self.net>.
        """
        self.net_name = net_name
        self.net = build_network(net_name)
        self.ae_net = build_network('fmnist_LeNet_rec')

    def load_model(self,
                   model_path,
                   map_location='cuda:1'):
        """
        Load the trained model for the model.
        The key here is to initialize <self.c>.
        """
        # Load the general model
        model_dict = torch.load(model_path, map_location=map_location)
        self.ae_net.load_state_dict(model_dict['net_dict'])
        
        # Obtain the net dictionary
        net_dict = self.net.state_dict()
        ae_net_dict = self.ae_net.state_dict()
        
        # Filter out decoder network keys
        ae_net_dict = {k: v for k, v in ae_net_dict.items() if k in net_dict}
        
        # Overwrite values in the existing state_dict
        net_dict.update(ae_net_dict)

        # Load the new state_dict
        self.net.load_state_dict(net_dict)
        

    def save_model(self, export_model, save_ae=True):
        net_dict = self.net.state_dict()
        torch.save({'net_dict': net_dict}, export_model)
    
    def test(self, train, dataset, device, batch_size, n_jobs_dataloader):
        if train:
            all_loader, _ = dataset.loaders(batch_size=batch_size,
                                            num_workers=n_jobs_dataloader)
        else:
            all_loader = dataset.loaders(batch_size=batch_size,
                                         num_workers=n_jobs_dataloader)
        net = self.net.to(device)
        criterion = nn.MSELoss(reduction='none')
        
        n_batches = 0
        X_pred_list = []
        net.eval()
        
        with torch.no_grad():
            for data in all_loader:
                X, y, idx = data
                X, y, idx = X.to(device), y.to(device), idx.to(device)

                X_pred = net(X)
                X_pred_list += X_pred
        
        return np.array(X_pred_list)

In [6]:
rec_encoder = RecEncoder()
rec_encoder.set_network()
rec_encoder.load_model(rec_model_path, device)

# 2. Dataset Loading

In [7]:
dataset_dict_train = {}
name_list = ['tshirt', 'trouser', 'pullover', 'dress', 'coat',
             'sandal', 'shirt', 'sneaker', 'bag', 'boot']

In [8]:
for i, name in enumerate(name_list):
    dataset_dict_train[name] = load_dataset(loader_name='fmnist',
                                            root=root,
                                            label_normal=(i,),
                                            ratio_abnormal=0)

Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!
Loading dataset for you!
Almost loaded!


# 3. Latent Vector Obtaining

> Let's use the unsupervised model of `pullover` as feature extractor, meaning that the latent space are defined from the neural weights of the unsupervised model of `pullover`.

In [9]:
latent_dict_train = {'oc':{}, 'rec':{}}

> ## For One Class Model

In [10]:
for name in name_list:
    dataset_train = dataset_dict_train[name]
    data_train = oc_encoder.test(True, dataset_train, device, 6000, 0)
    data_train = np.array([x.cpu().numpy() for x in data_train])
    latent_dict_train['oc'][name] = data_train

> ## For Rec Model

In [11]:
for name in name_list:
    dataset_train = dataset_dict_train[name]
    data_train = rec_encoder.test(True, dataset_train, device, 6000, 0)
    data_train = np.array([x.cpu().numpy() for x in data_train])
    latent_dict_train['rec'][name] = data_train

# Function for KL Divergence by KNN

> Cited from https://github.com/nhartland/KL-divergence-estimators

In [12]:
from sklearn.neighbors import NearestNeighbors
from scipy.spatial import KDTree

def knn_distance(point, sample, k):
    """ 
    Euclidean distance from `point` to it's `k`-Nearest
    Neighbour in `sample` 
    """
    norms = np.linalg.norm(sample-point, axis=1)
    return np.sort(norms)[k]


def verify_sample_shapes(s1, s2, k):
    # Expects [N, D]
    assert(len(s1.shape) == len(s2.shape) == 2)
    # Check dimensionality of sample is identical
    assert(s1.shape[1] == s2.shape[1])
    
    
def skl_estimator(s1, s2, k=3):
    """ 
    KL-Divergence estimator using scikit-learn's NearestNeighbours.
    Inputs:
        s1: (N_1,D) Sample drawn from distribution P
        s2: (N_2,D) Sample drawn from distribution Q
        k: Number of neighbours considered (default 1)
    return: 
        estimated D(P|Q)
    """
    verify_sample_shapes(s1, s2, k)

    n, m = len(s1), len(s2)
    d = float(s1.shape[1])
    D = np.log(m / (n - 1))

    s1_neighbourhood = NearestNeighbors(k + 1, 10).fit(s1)
    s2_neighbourhood = NearestNeighbors(k, 10).fit(s2)

    for p1 in s1:
        s1_distances, indices = s1_neighbourhood.kneighbors([p1], k + 1)
        s2_distances, indices = s2_neighbourhood.kneighbors([p1], k)
        rho = s1_distances[0][- 1]
        nu = s2_distances[0][- 1]
        D += (d / n) * np.log(nu / rho)
        D += 0
    return D

> Cited from https://github.com/gregversteeg/NPEET/blob/master/npeet/entropy_estimators.py

In [13]:
import warnings

import numpy as np
import numpy.linalg as la
from numpy import log
from scipy.special import digamma
from sklearn.neighbors import BallTree, KDTree

# CONTINUOUS ESTIMATORS


def entropy(x, k=3, base=2):
    """ The classic K-L k-nearest neighbor continuous entropy estimator
        x should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
        if x is a one-dimensional scalar and we have four samples
    """
    assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
    x = np.asarray(x)
    n_elements, n_features = x.shape
    x = add_noise(x)
    tree = build_tree(x)
    nn = query_neighbors(tree, x, k)
    const = digamma(n_elements) - digamma(k) + n_features * log(2)
    return (const + n_features * np.log(nn).mean()) / log(base)


def centropy(x, y, k=3, base=2):
    """ The classic K-L k-nearest neighbor continuous entropy estimator for the
        entropy of X conditioned on Y.
    """
    xy = np.c_[x, y]
    entropy_union_xy = entropy(xy, k=k, base=base)
    entropy_y = entropy(y, k=k, base=base)
    return entropy_union_xy - entropy_y


def tc(xs, k=3, base=2):
    xs_columns = np.expand_dims(xs, axis=0).T
    entropy_features = [entropy(col, k=k, base=base) for col in xs_columns]
    return np.sum(entropy_features) - entropy(xs, k, base)


def ctc(xs, y, k=3, base=2):
    xs_columns = np.expand_dims(xs, axis=0).T
    centropy_features = [centropy(col, y, k=k, base=base)
                         for col in xs_columns]
    return np.sum(centropy_features) - centropy(xs, y, k, base)


def corex(xs, ys, k=3, base=2):
    xs_columns = np.expand_dims(xs, axis=0).T
    cmi_features = [mi(col, ys, k=k, base=base) for col in xs_columns]
    return np.sum(cmi_features) - mi(xs, ys, k=k, base=base)


def mi(x, y, z=None, k=3, base=2, alpha=0):
    """ Mutual information of x and y (conditioned on z if z is not None)
        x, y should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
        if x is a one-dimensional scalar and we have four samples
    """
    assert len(x) == len(y), "Arrays should have same length"
    assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
    x, y = np.asarray(x), np.asarray(y)
    x, y = x.reshape(x.shape[0], -1), y.reshape(y.shape[0], -1)
    x = add_noise(x)
    y = add_noise(y)
    points = [x, y]
    if z is not None:
        z = np.asarray(z)
        z = z.reshape(z.shape[0], -1)
        points.append(z)
    points = np.hstack(points)
    # Find nearest neighbors in joint space, p=inf means max-norm
    tree = build_tree(points)
    dvec = query_neighbors(tree, points, k)
    if z is None:
        a, b, c, d = avgdigamma(x, dvec), avgdigamma(
            y, dvec), digamma(k), digamma(len(x))
        if alpha > 0:
            d += lnc_correction(tree, points, k, alpha)
    else:
        xz = np.c_[x, z]
        yz = np.c_[y, z]
        a, b, c, d = avgdigamma(xz, dvec), avgdigamma(
            yz, dvec), avgdigamma(z, dvec), digamma(k)
    return (-a - b + c + d) / log(base)


def cmi(x, y, z, k=3, base=2):
    """ Mutual information of x and y, conditioned on z
        Legacy function. Use mi(x, y, z) directly.
    """
    return mi(x, y, z=z, k=k, base=base)


def kldiv(x, xp, k=3, base=2):
    """ KL Divergence between p and q for x~p(x), xp~q(x)
        x, xp should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
        if x is a one-dimensional scalar and we have four samples
    """
    assert k < min(len(x), len(xp)), "Set k smaller than num. samples - 1"
    assert len(x[0]) == len(xp[0]), "Two distributions must have same dim."
    x, xp = np.asarray(x), np.asarray(xp)
    x, xp = x.reshape(x.shape[0], -1), xp.reshape(xp.shape[0], -1)
    d = len(x[0])
    n = len(x)
    m = len(xp)
    const = log(m) - log(n - 1)
    tree = build_tree(x)
    treep = build_tree(xp)
    nn = query_neighbors(tree, x, k)
    nnp = query_neighbors(treep, x, k - 1)
    return (const + d * (np.log(nnp).mean() - np.log(nn).mean())) / log(base)


def lnc_correction(tree, points, k, alpha):
    e = 0
    n_sample = points.shape[0]
    for point in points:
        # Find k-nearest neighbors in joint space, p=inf means max norm
        knn = tree.query(point[None, :], k=k+1, return_distance=False)[0]
        knn_points = points[knn]
        # Substract mean of k-nearest neighbor points
        knn_points = knn_points - knn_points[0]
        # Calculate covariance matrix of k-nearest neighbor points, obtain eigen vectors
        covr = knn_points.T @ knn_points / k
        _, v = la.eig(covr)
        # Calculate PCA-bounding box using eigen vectors
        V_rect = np.log(np.abs(knn_points @ v).max(axis=0)).sum()
        # Calculate the volume of original box
        log_knn_dist = np.log(np.abs(knn_points).max(axis=0)).sum()

        # Perform local non-uniformity checking and update correction term
        if V_rect < log_knn_dist + np.log(alpha):
            e += (log_knn_dist - V_rect) / n_sample
    return e


# DISCRETE ESTIMATORS
def entropyd(sx, base=2):
    """ Discrete entropy estimator
        sx is a list of samples
    """
    unique, count = np.unique(sx, return_counts=True, axis=0)
    # Convert to float as otherwise integer division results in all 0 for proba.
    proba = count.astype(float) / len(sx)
    # Avoid 0 division; remove probabilities == 0.0 (removing them does not change the entropy estimate as 0 * log(1/0) = 0.
    proba = proba[proba > 0.0]
    return np.sum(proba * np.log(1. / proba)) / log(base)


def midd(x, y, base=2):
    """ Discrete mutual information estimator
        Given a list of samples which can be any hashable object
    """
    assert len(x) == len(y), "Arrays should have same length"
    return entropyd(x, base) - centropyd(x, y, base)


def cmidd(x, y, z, base=2):
    """ Discrete mutual information estimator
        Given a list of samples which can be any hashable object
    """
    assert len(x) == len(y) == len(z), "Arrays should have same length"
    xz = np.c_[x, z]
    yz = np.c_[y, z]
    xyz = np.c_[x, y, z]
    return entropyd(xz, base) + entropyd(yz, base) - entropyd(xyz, base) - entropyd(z, base)


def centropyd(x, y, base=2):
    """ The classic K-L k-nearest neighbor continuous entropy estimator for the
        entropy of X conditioned on Y.
    """
    xy = np.c_[x, y]
    return entropyd(xy, base) - entropyd(y, base)


def tcd(xs, base=2):
    xs_columns = np.expand_dims(xs, axis=0).T
    entropy_features = [entropyd(col, base=base) for col in xs_columns]
    return np.sum(entropy_features) - entropyd(xs, base)


def ctcd(xs, y, base=2):
    xs_columns = np.expand_dims(xs, axis=0).T
    centropy_features = [centropyd(col, y, base=base) for col in xs_columns]
    return np.sum(centropy_features) - centropyd(xs, y, base)


def corexd(xs, ys, base=2):
    xs_columns = np.expand_dims(xs, axis=0).T
    cmi_features = [midd(col, ys, base=base) for col in xs_columns]
    return np.sum(cmi_features) - midd(xs, ys, base)


# MIXED ESTIMATORS
def micd(x, y, k=3, base=2, warning=True):
    """ If x is continuous and y is discrete, compute mutual information
    """
    assert len(x) == len(y), "Arrays should have same length"
    entropy_x = entropy(x, k, base)

    y_unique, y_count = np.unique(y, return_counts=True, axis=0)
    y_proba = y_count / len(y)

    entropy_x_given_y = 0.
    for yval, py in zip(y_unique, y_proba):
        x_given_y = x[(y == yval).all(axis=1)]
        if k <= len(x_given_y) - 1:
            entropy_x_given_y += py * entropy(x_given_y, k, base)
        else:
            if warning:
                warnings.warn("Warning, after conditioning, on y={yval} insufficient data. "
                              "Assuming maximal entropy in this case.".format(yval=yval))
            entropy_x_given_y += py * entropy_x
    return abs(entropy_x - entropy_x_given_y)  # units already applied


def midc(x, y, k=3, base=2, warning=True):
    return micd(y, x, k, base, warning)


def centropycd(x, y, k=3, base=2, warning=True):
    return entropy(x, base) - micd(x, y, k, base, warning)


def centropydc(x, y, k=3, base=2, warning=True):
    return centropycd(y, x, k=k, base=base, warning=warning)


def ctcdc(xs, y, k=3, base=2, warning=True):
    xs_columns = np.expand_dims(xs, axis=0).T
    centropy_features = [centropydc(
        col, y, k=k, base=base, warning=warning) for col in xs_columns]
    return np.sum(centropy_features) - centropydc(xs, y, k, base, warning)


def ctccd(xs, y, k=3, base=2, warning=True):
    return ctcdc(y, xs, k=k, base=base, warning=warning)


def corexcd(xs, ys, k=3, base=2, warning=True):
    return corexdc(ys, xs, k=k, base=base, warning=warning)


def corexdc(xs, ys, k=3, base=2, warning=True):
    return tcd(xs, base) - ctcdc(xs, ys, k, base, warning)


# UTILITY FUNCTIONS

def add_noise(x, intens=1e-10):
    # small noise to break degeneracy, see doc.
    return x + intens * np.random.random_sample(x.shape)


def query_neighbors(tree, x, k):
    return tree.query(x, k=k + 1)[0][:, k]


def count_neighbors(tree, x, r):
    return tree.query_radius(x, r, count_only=True)


def avgdigamma(points, dvec):
    # This part finds number of neighbors in some radius in the marginal space
    # returns expectation value of <psi(nx)>
    tree = build_tree(points)
    dvec = dvec - 1e-15
    num_points = count_neighbors(tree, points, dvec)
    return np.mean(digamma(num_points))


def build_tree(points):
    if points.shape[1] >= 20:
        return BallTree(points, metric='chebyshev')
    return KDTree(points, metric='chebyshev')

> Cited from universal_divergence

In [14]:
from universal_divergence import estimate

# Calculating Joint KL Divergence

> ## For One Class Model

Non-sysmetric KL Divergence

In [19]:
oc_gram = np.zeros((10, 10))

for i, x_i in enumerate(latent_dict_train['oc']):
    for j, x_j in enumerate(latent_dict_train['oc']):
        left = latent_dict_train['oc'][x_i]
        if i == j:
            right = left
        else:
            right = np.r_[latent_dict_train['oc'][x_i], latent_dict_train['oc'][x_j]]
        oc_gram[i][j] = skl_estimator(left, right)
        print(i, j, oc_gram[i][j])

0 0 -2.354828129099568
0 1 -1.8498137466923816
0 2 -2.274063666910703
0 3 -2.242290256144239
0 4 -2.1882307533006604
0 5 -1.7123093569054735
0 6 -2.894960748697684
0 7 -1.6864171224097406
0 8 -1.8567664866270666
0 9 -1.6933993284098678
1 0 -2.6618202267996636
1 1 -3.115679273424921
1 2 -2.5970491260162007
1 3 -2.805409003225345
1 4 -2.590596954465307
1 5 -2.4481038387245455
1 6 -2.6492309494601654
1 7 -2.4366167366676796
1 8 -2.452341977601175
1 9 -2.4314755296429027
2 0 -0.9095226750060134
2 1 -0.7739677100067145
2 2 -1.4235522350949072
2 3 -0.8656402623814345
2 4 -1.4454330555553514
2 5 -0.7463445561455211
2 6 -1.4670334321028835
2 7 -0.7440985609887868
2 8 -0.7865783539429099
2 9 -0.7466445176547827
3 0 -2.5039259553289424
3 1 -2.1027065501582327
3 2 -2.4008056479403845
3 3 -2.5023997645982634
3 4 -2.5749480125630986
3 5 -1.8560076321364247
3 6 -2.5176900646839804
3 7 -1.8305902517532102
3 8 -1.8911254106015596
3 9 -1.822694665820784
4 0 -1.3612307402300714
4 1 -1.2283004122765049
4

In [22]:
joblib.dump(oc_gram, 'misc/oc_gram.pkl')

['misc/oc_gram.pkl']

In [None]:
oc_gram_sys = np.zeros((10, 10))

for i, x_i in enumerate(latent_dict_train['oc']):
    for j, x_j in enumerate(latent_dict_train['oc']):
        left = latent_dict_train['oc'][x_i]
        if i == j:
            right = left 
        else:
            right = np.r_[latent_dict_train['oc'][x_i], latent_dict_train['oc'][x_j]]
            
        oc_gram_sys[i][j] = 0.5 * (skl_estimator(left, right) + skl_estimator(right, left))
        
        print(i, j, oc_gram_sys[i][j])

0 0 -2.354828129099568
0 1 8.372350223130455
0 2 0.632486993778488
0 3 2.8342967265786028
0 4 0.7207779485948227
0 5 3.181389318121882
0 6 -0.10480073219150121
0 7 10.520922675282398
0 8 5.23345918853433
0 9 12.780899299240435
1 0 3.278694874573044
1 1 -3.115679273424921
1 2 2.2272192968072693
1 3 1.7735896929705406
1 4 1.8507408675835626
1 5 3.689556755682486
1 6 1.9053755238472616
1 7 11.054650957051862
1 8 6.429171433353918
1 9 16.904121054736237
2 0 3.0740788336782523
2 1 9.907598697391228
2 2 -1.4235522350949072
2 3 4.83690802965049
2 4 -0.2028645714832764
2 5 3.716571587155955
2 6 0.1046149715085074
2 7 11.127540019082039
2 8 6.143370770701506
2 9 17.297589896438282
3 0 2.2007246800976885
3 1 6.863260537629159
3 2 1.3063262944065508
3 3 -2.5023997645982634
3 4 0.7880629328969462
3 5 3.206709653428155
3 6 0.8678840181378191
3 7 10.499469752299891
3 8 6.138952229344589
3 9 15.006995068905713
4 0 2.9758632320241496
4 1 9.212123149389848
4 2 -0.5351465797876966
4 3 3.783267310914543


In [None]:
joblib.dump(oc_gram_sys, 'misc/oc_gram_sys.pkl')

> For Rec Model

In [None]:
rec_gram = np.zeros((10, 10))

for i, x_i in enumerate(latent_dict_train['rec']):
    for j, x_j in enumerate(latent_dict_train['rec']):
        left = latent_dict_train['rec'][x_i]
        if i == j:
            right = left
        else:
            right = np.r_[latent_dict_train['rec'][x_i], latent_dict_train['rec'][x_j]]
        rec_gram[i][j] = skl_estimator(left, right)
        print(i, j, rec_gram[i][j])

In [None]:
joblib.dump(rec_gram, 'misc/rec_gram.pkl')

In [None]:
for i in range(10):

In [15]:
name_list

['tshirt',
 'trouser',
 'pullover',
 'dress',
 'coat',
 'sandal',
 'shirt',
 'sneaker',
 'bag',
 'boot']

In [None]:
rec_gram_sys_ = np.zeros((10, 10))

for i, x_i in enumerate(latent_dict_train['rec']):
    for j, x_j in enumerate(latent_dict_train['rec']):
        left = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec'][x_i]]
        right = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec'][x_j]]
        rec_gram_sys_[i][j] = 0.5 * (skl_estimator(left, right) + skl_estimator(right, left))
        
        print(i, j, rec_gram_sys_[i][j])

0 0 -2.250281069595419
0 1 15.824935617870292
0 2 6.131335384522765
0 3 7.865118500694239
0 4 6.801428913329501
0 5 15.369509977835822
0 6 2.214840167942079
0 7 20.705115201901734
0 8 11.321564940618764
0 9 20.575184344212254
1 0 15.824935617870292
1 1 -2.4486417116658665
1 2 13.631926647486072
1 3 10.542556604341224
1 4 12.410020612213536
1 5 23.343266090583885
1 6 12.66060688836089
1 7 28.96559045677866
1 8 18.514331869247336
1 9 28.755787119534908
2 0 6.131335384522765
2 1 13.631926647486072
2 2 8.333680574837545e-05
2 3 7.041352319240344
2 4 2.115061431801671
2 5 10.916015391815506
2 6 2.3789507767714513
2 7 16.4161500474098
2 8 6.818326126430715
2 9 16.50732695055503
3 0 7.865118500694239
3 1 10.542556604341224
3 2 7.041352319240344
3 3 -2.23631878381988
3 4 5.802529730129007
3 5 16.774812288207997
3 6 5.676743697118971
3 7 22.43947845798847
3 8 11.932246505128951
3 9 22.035742919290225
4 0 6.801428913329501
4 1 12.410020612213536
4 2 2.115061431801671
4 3 5.802529730129007
4 4 -2

In [None]:
rec_gram_sys_ = np.zeros((10, 10))

for i, x_i in enumerate(latent_dict_train['rec']):
    if i < 8:
        continue
    for j, x_j in enumerate(latent_dict_train['rec']):
        left = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec'][x_i]]
        right = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec'][x_j]]
        rec_gram_sys_[i][j] = 0.5 * (skl_estimator(left, right) + skl_estimator(right, left))
        
        print(i, j, rec_gram_sys_[i][j])

8 0 14.357915409695117
8 1 21.10255600807521
8 2 8.172230218238127
8 3 14.341366345895981
8 4 8.74155004088028
8 5 13.306805792042411


In [23]:
rec_gram_sys_ = np.zeros((10, 10))

for i, x_i in enumerate(latent_dict_train['rec']):
    for j, x_j in enumerate(latent_dict_train['rec']):
        left = latent_dict_train['rec'][x_i]
        if i == j:
            right = left 
        else:
            right = np.r_[latent_dict_train['rec'][x_i], latent_dict_train['rec'][x_j]]
            
        rec_gram_sys[i][j] = 0.5 * (skl_estimator(left, right) + skl_estimator(right, left))
        
        print(i, j, rec_gram_sys[i][j])

0 0 -2.374685204233279
0 1 9.388758905429942
0 2 4.3290357052324815
0 3 3.2845118312785173
0 4 5.1263364119147266
0 5 7.469907312417979
0 6 2.053953081168619
0 7 11.668403525707204
0 8 5.949233584698067
0 9 13.670346892646844
1 0 7.479061788144726
1 1 -2.765432814088853
1 2 5.703915914330305
1 3 2.8157584627452206
1 4 6.096560815277588
1 5 9.478108448500471
1 6 5.735912597748136
1 7 14.956690648832325
1 8 9.719402270143139
1 9 16.033443470553024
2 0 5.270507654839799
2 1 11.671844714250408
2 2 -2.3990006264736086
2 3 5.766454930991621
2 4 0.30726571764174415
2 5 6.771600542884427
2 6 0.7740232262350699
2 7 11.295032210743338
2 8 5.120180500255494
2 9 13.086691814345226
3 0 4.620254172817776
3 1 7.129297708793364
3 2 5.364664581746643
3 3 -2.318751138820908
3 4 4.471196458475208
3 5 8.590257434434607
3 6 3.8091844822986873
3 7 13.215546444533558
3 8 7.861650120004175
3 9 15.245930860936328
4 0 6.248791034143045
4 1 9.652452675474443
4 2 0.34674793375192925
4 3 4.336442671401848
4 4 -2.1

In [24]:
joblib.dump(rec_gram_sys, 'misc/rec_gram_sys.pkl')

['misc/rec_gram_sys.pkl']

**Load for extra data**

In [92]:
pullover_train = latent_dict_train['oc']['pullover']

In [66]:
dataset_pullover_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(2,),
                                     test_eval=True)
pullover_test = oc_encoder.test(False, dataset_pullover_test, device, 1000, 0)
pullover_test = np.array([x.cpu().numpy() for x in pullover_test])

In [67]:
dataset_coat_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(4,),
                                     test_eval=True)
coat_test = oc_encoder.test(False, dataset_coat_test, device, 1000, 0)
coat_test = np.array([x.cpu().numpy() for x in coat_test])

In [68]:
dataset_sneaker_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(7,),
                                     test_eval=True)
sneaker_test = oc_encoder.test(False, dataset_sneaker_test, device, 1000, 0)
sneaker_test = np.array([x.cpu().numpy() for x in sneaker_test])

In [80]:
pullover_coat_train = np.r_[latent_dict_train['oc']['pullover'], latent_dict_train['oc']['coat']]
pullover_sneaker_train = np.r_[latent_dict_train['oc']['pullover'], latent_dict_train['oc']['sneaker']]

pullover_coat_test_ = np.r_[pullover_test, coat_test]
pullover_sneaker_test_ = np.r_[pullover_test, sneaker_test]

**For Pullover & Coat**

> Joint Distribution

In [84]:
dict_pullover_coat = {}
dict_pullover_coat['coat'] = skl_estimator(pullover_coat_train, pullover_coat_test_)

In [90]:
for x in name_list:
    if x in ['coat', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['oc'][x]]
    kl_ = skl_estimator(pullover_coat_train, joint)
    dict_pullover_coat[x] = kl_
    print(x, kl_)

tshirt 6.678524225035061
trouser 7.185524170868892
dress 6.070237151115829
sandal 7.661263468168078
shirt 3.8714684233428267
sneaker 7.699502890825909
bag 7.41631880409517
boot 7.716410785422828


In [94]:
dict_pullover_coat

{'coat': 7.661263468168078,
 'tshirt': 6.678524225035061,
 'trouser': 7.185524170868892,
 'dress': 6.070237151115829,
 'sandal': 7.661263468168078,
 'shirt': 3.8714684233428267,
 'sneaker': 7.699502890825909,
 'bag': 7.41631880409517,
 'boot': 7.716410785422828}

> Marginal Distribution

In [91]:
dict_pullover_coat_ = {}
dict_pullover_coat_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['oc']['coat'])

In [93]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['coat']:
        marginal = coat_test
    else:
        marginal = latent_dict_all['oc'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_coat_[x] = kl_
    print(x, kl_)

tshirt 11.072399005169489
trouser 18.745953300786717
dress 13.76077460303965
coat 7.160040157098573
sandal 21.11867218781776
shirt 4.209448972482385
sneaker 23.227279890195163
bag 21.15262816948465
boot 27.834980624381743


In [97]:
dict_pullover_coat_

{'training divergence': 4.568845462776275,
 'tshirt': 11.072399005169489,
 'trouser': 18.745953300786717,
 'dress': 13.76077460303965,
 'coat': 7.160040157098573,
 'sandal': 21.11867218781776,
 'shirt': 4.209448972482385,
 'sneaker': 23.227279890195163,
 'bag': 21.15262816948465,
 'boot': 27.834980624381743}

**For Pullover & Sneaker**

> Joint Distribution

In [102]:
dict_pullover_sneaker = {}
dict_pullover_sneaker['sneaker'] = skl_estimator(pullover_sneaker_train, pullover_sneaker_test_)

In [103]:
for x in name_list:
    if x in ['sneaker', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['oc'][x]]
    kl_ = skl_estimator(pullover_sneaker_train, joint)
    dict_pullover_sneaker[x] = kl_
    print(x, kl_)

tshirt 27.749288133239418
trouser 28.80863523884461
dress 27.979325455792644
coat 26.085610547476527
sandal 12.502880735513374
shirt 25.872357051664995
bag 21.306602137031792
boot 14.474244197800164


In [104]:
dict_pullover_sneaker

{'sneaker': 4.945477682473212,
 'tshirt': 27.749288133239418,
 'trouser': 28.80863523884461,
 'dress': 27.979325455792644,
 'coat': 26.085610547476527,
 'sandal': 12.502880735513374,
 'shirt': 25.872357051664995,
 'bag': 21.306602137031792,
 'boot': 14.474244197800164}

> Marginal Distribution

In [111]:
dict_pullover_sneaker_ = {}
dict_pullover_sneaker_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['oc']['sneaker'])

In [114]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['sneaker']:
        marginal = sneaker_test
    else:
        marginal = latent_dict_all['oc'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_sneaker_[x] = kl_
    print(x, kl_)

tshirt 11.072399005169489
trouser 18.745953300786717
dress 13.76077460303965
coat 4.339410330539567
sandal 21.11867218781776
shirt 4.209448972482385
sneaker 24.12124078252928
bag 21.15262816948465
boot 27.834980624381743


In [115]:
dict_pullover_sneaker_

{'training divergence': 23.674082066947456,
 'tshirt': 11.072399005169489,
 'trouser': 18.745953300786717,
 'dress': 13.76077460303965,
 'coat': 4.339410330539567,
 'sandal': 21.11867218781776,
 'shirt': 4.209448972482385,
 'sneaker': 24.12124078252928,
 'bag': 21.15262816948465,
 'boot': 27.834980624381743}

> ## For Reconstruction Model

In [116]:
pullover_train = latent_dict_train['rec']['pullover']

dataset_pullover_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(2,),
                                     test_eval=True)
pullover_test = rec_encoder.test(False, dataset_pullover_test, device, 1000, 0)
pullover_test = np.array([x.cpu().numpy() for x in pullover_test])

In [117]:
dataset_coat_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(4,),
                                     test_eval=True)
coat_test = rec_encoder.test(False, dataset_coat_test, device, 1000, 0)
coat_test = np.array([x.cpu().numpy() for x in coat_test])

In [118]:
dataset_sneaker_test = load_dataset(loader_name='fmnist_eval',
                                     root=root,
                                     label_eval=(7,),
                                     test_eval=True)
sneaker_test = rec_encoder.test(False, dataset_sneaker_test, device, 1000, 0)
sneaker_test = np.array([x.cpu().numpy() for x in sneaker_test])

In [119]:
pullover_coat_train = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec']['coat']]
pullover_sneaker_train = np.r_[latent_dict_train['rec']['pullover'], latent_dict_train['rec']['sneaker']]

pullover_coat_test_ = np.r_[pullover_test, coat_test]
pullover_sneaker_test_ = np.r_[pullover_test, sneaker_test]

**For Pullover & Coat**

> Joint Distribution

In [120]:
dict_pullover_coat = {}
dict_pullover_coat['coat'] = skl_estimator(pullover_coat_train, pullover_coat_test_)

In [121]:
for x in name_list:
    if x in ['coat', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['rec'][x]]
    kl_ = skl_estimator(pullover_coat_train, joint)
    dict_pullover_coat[x] = kl_
    print(x, kl_)

tshirt 11.120735874828831
trouser 11.637051307975428
dress 9.935287005951649
sandal 12.333502323614523
shirt 6.703082442223101
sneaker 12.347926201399233
bag 11.785093318871137
boot 12.351785909246662


In [122]:
dict_pullover_coat

{'coat': 5.7802951521432515,
 'tshirt': 11.120735874828831,
 'trouser': 11.637051307975428,
 'dress': 9.935287005951649,
 'sandal': 12.333502323614523,
 'shirt': 6.703082442223101,
 'sneaker': 12.347926201399233,
 'bag': 11.785093318871137,
 'boot': 12.351785909246662}

> Marginal Distribution

In [123]:
dict_pullover_coat_ = {}
dict_pullover_coat_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['rec']['coat'])

In [124]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['coat']:
        marginal = coat_test
    else:
        marginal = latent_dict_all['rec'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_coat_[x] = kl_
    print(x, kl_)

tshirt 22.419006960163852
trouser 29.222279733304465
dress 25.72757446330517
coat 13.3593069721264
sandal 42.72670602945978
shirt 8.374302806296916
sneaker 51.56025831856643
bag 27.437054142348256
boot 42.617041903532666


In [125]:
dict_pullover_coat_

{'training divergence': 8.94335837191094,
 'tshirt': 22.419006960163852,
 'trouser': 29.222279733304465,
 'dress': 25.72757446330517,
 'coat': 13.3593069721264,
 'sandal': 42.72670602945978,
 'shirt': 8.374302806296916,
 'sneaker': 51.56025831856643,
 'bag': 27.437054142348256,
 'boot': 42.617041903532666}

**For Pullover & Sneaker**

> Joint Distribution

In [126]:
dict_pullover_sneaker = {}
dict_pullover_sneaker['sneaker'] = skl_estimator(pullover_sneaker_train, pullover_sneaker_test_)

In [127]:
for x in name_list:
    if x in ['sneaker', 'pullover']:
        continue
    
    joint = np.r_[pullover_test, latent_dict_all['rec'][x]]
    kl_ = skl_estimator(pullover_sneaker_train, joint)
    dict_pullover_sneaker[x] = kl_
    print(x, kl_)

tshirt 27.240742390932915
trouser 32.03923934051166
dress 30.509918074422842
coat 28.90765698237408
sandal 11.615368704008327
shirt 25.05988784523028
bag 18.78444655946627
boot 13.841945709052613


In [128]:
dict_pullover_sneaker

{'sneaker': 5.553674691659444,
 'tshirt': 27.240742390932915,
 'trouser': 32.03923934051166,
 'dress': 30.509918074422842,
 'coat': 28.90765698237408,
 'sandal': 11.615368704008327,
 'shirt': 25.05988784523028,
 'bag': 18.78444655946627,
 'boot': 13.841945709052613}

> Marginal Distribution

In [129]:
dict_pullover_sneaker_ = {}
dict_pullover_sneaker_['training divergence'] = skl_estimator(pullover_train, latent_dict_train['rec']['sneaker'])

In [130]:
for x in name_list:
    if x in ['pullover']:
        continue
        
    if x in ['sneaker']:
        marginal = sneaker_test
    else:
        marginal = latent_dict_all['rec'][x]
        
    kl_ = skl_estimator(pullover_train, marginal)
    dict_pullover_sneaker_[x] = kl_
    print(x, kl_)

tshirt 22.419006960163852
trouser 29.222279733304465
dress 25.72757446330517
coat 8.561594577078122
sandal 42.72670602945978
shirt 8.374302806296916
sneaker 58.13651305406425
bag 27.437054142348256
boot 42.617041903532666


In [131]:
dict_pullover_sneaker_

{'training divergence': 51.440300511933614,
 'tshirt': 22.419006960163852,
 'trouser': 29.222279733304465,
 'dress': 25.72757446330517,
 'coat': 8.561594577078122,
 'sandal': 42.72670602945978,
 'shirt': 8.374302806296916,
 'sneaker': 58.13651305406425,
 'bag': 27.437054142348256,
 'boot': 42.617041903532666}