# Performance Measures

In [175]:
import dgl
import dgl.function as fn
from dgl import DGLGraph
from dgl.data import citation_graph as citegrh

# pytorch
import torch as th
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from scipy import sparse as sp
from math import log
import pandas as pd

import itertools

from sklearn import metrics as skmetrics

In [137]:
#Loading CORA
data = citegrh.load_cora()
features = th.FloatTensor(data.features)
labels = th.LongTensor(data.labels)
mask = th.ByteTensor(data.train_mask)
g = data.graph
logp = th.rand(size=(labels.shape[0],7))
logp_ls = F.log_softmax(logp,1)

In [138]:
logp_ls

tensor([[-1.7600, -1.8982, -1.7362,  ..., -2.0066, -1.7634, -2.3472],
        [-1.7248, -2.1782, -1.9963,  ..., -2.1172, -1.7488, -1.7006],
        [-2.1764, -1.6801, -2.2152,  ..., -1.5862, -1.8226, -2.2140],
        ...,
        [-1.9556, -2.2637, -2.2834,  ..., -1.5916, -1.7782, -2.4438],
        [-1.6784, -1.9707, -1.6544,  ..., -1.9837, -1.9229, -2.4820],
        [-1.8556, -2.1586, -2.0655,  ..., -1.5751, -1.7178, -2.1172]])

In [30]:
labels = th.LongTensor(np.random.choice(range(3),size=5,p=[1/3,1/3,1/3]))
labels = labels.numpy()
labels

array([1, 2, 2, 0, 1], dtype=int64)

In [4]:
np.eye(3)[labels]

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

In [79]:
logp = th.rand(size=(5,3))
logp_ls = F.log_softmax(logp,1)
print(F.softmax(logp,1))
preds = np.argmax(logp.numpy(),axis=1)
print(preds)

tensor([[0.3540, 0.1981, 0.4479],
        [0.4100, 0.2337, 0.3563],
        [0.3765, 0.4116, 0.2120],
        [0.1791, 0.3970, 0.4239],
        [0.2602, 0.3684, 0.3714]])
[2 0 1 2 2]


In [56]:
perms = list(itertools.permutations(np.unique(labels)))
scores = th.tensor([F.nll_loss(logp_ls[:,p], labels) for p in perms])
print(scores)
th.min(scores)

tensor([0.9687, 1.3232, 1.0844, 1.2152, 1.2067, 0.9830])


tensor(0.9687)

In [131]:
def rand_score(labels,preds):
    return skmetrics.adjusted_rand_score(labels,preds)

def mutual_info_score(labels,preds):
    return skmetrics.adjusted_mutual_info_score(labels, preds, average_method="arithmetic")

def variation_of_information_score(labels,preds):
    def mi(x,y):
        contingency = skmetrics.cluster.contingency_matrix(x, y, sparse=True)
        #print(contingency.todense())
        nzx, nzy, nz_val = sp.find(contingency)
        contingency_sum = contingency.sum()

        pi = np.ravel(contingency.sum(axis=1))
        pj = np.ravel(contingency.sum(axis=0))
        #print(nz_val)
        log_contingency_nm = np.log(nz_val)
        #print(log_contingency_nm)
        contingency_nm = nz_val / contingency_sum
        #print(contingency_nm)

        # Don't need to calculate the full outer product, just for non-zeroes
        outer = (pi.take(nzx).astype(np.int64, copy=False)
                * pj.take(nzy).astype(np.int64, copy=False))
        #print(outer)
        log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
        #print(log_outer)
        mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) + contingency_nm * log_outer)
        #print(mi)
        return mi.sum()
    return mi(labels,labels) + mi(preds,preds) - 2 * mi(labels,preds)

In [135]:
print(labels)
print(preds)
print("")

print(rand_score(labels,preds))
print(mutual_info_score(labels,preds))
print(variation_of_information_score(labels,preds))

[1 2 2 0 1]
[2 0 1 2 2]

0.2105263157894737
0.25177471661855355
0.6591673732008663


In [176]:
def compute_performance(labels,logits,mask):
    logits = logits.detach().numpy()
    preds = np.argmax(logits,axis=1)
    labels = labels.numpy()
    mask = mask.numpy().astype(bool)
    pred_sets = {"All ":preds,"Train":preds[mask],"Test":preds[np.invert(mask)]}
    label_sets = {"All ":labels,"Train":labels[mask],"Test":labels[np.invert(mask)]}
    eval_functions = {
        "Rand-Index": rand_score,
        "Mutual Information": mutual_info_score,
        "Variation of Information": variation_of_information_score}
    scores = {subset: {name: func(label_sets[subset], pred_sets[subset]) for name,func in eval_functions.items()} for subset in pred_sets.keys()}
    return scores

def print_performance(labels,logits,mask):
    scores = compute_performance(labels,logits,mask)
    for subset_n, data in scores.items():
        eval_message = f"\n{subset_n}:\n"
        for func, score in data.items():
            eval_message += f" {func}: {score:.4f} |"
        print(eval_message)

def performance_as_df(labels,logits,mask):
    scores = compute_performance(labels,logits,mask)
    return pd.DataFrame(scores)

In [177]:
performance_as_df(labels,logp_ls,mask)

Unnamed: 0,All,Train,Test
Mutual Information,0.00085,-0.020248,0.001165
Rand-Index,0.00044,-0.015048,0.000489
Variation of Information,3.759567,3.506337,3.755451


In [342]:
import itertools
class perm_inv_loss:
    def __init__(self, labels):
        self.labels = labels
        self.num_classes = len(labels.unique())
        self.label_perms = {i: None for i in range(2,self.num_classes+1)}

    def compute_loss(self,logits,mask):
        if self.label_perms[self.num_classes] is None:
            self.label_perms[self.num_classes] = list(itertools.permutations(np.unique(labels)))

        loss = th.tensor(np.infty,requires_grad=True)
        for p in self.label_perms:
            loss = th.min(loss,F.nll_loss(logits[mask][:,p], labels[mask]))
        return loss

    def approximate_loss(self,logits,mask,nclasses=3):
        if self.label_perms[nclasses] is None:
            self.label_perms[nclasses] = list(itertools.permutations(range(nclasses)))

        # randomly assign labels to new clusters (trying to roughly achieve equal distribution)
        assignments = np.random.choice([i % nclasses for i in range(self.num_classes)],size=self.num_classes,replace=False)
        new_labels = th.LongTensor(assignments[self.labels])
        one_hot_assignments = th.ByteTensor(np.eye(np.max(assignments) + 1)[assignments])
        tensors = [th.sum(logits[:,one_hot_assignments[:,i]],dim=1) for i in range(nclasses)]
        new_logits = th.stack(tensors,1)
        new_label_perms = list(itertools.permutations(np.unique(new_labels)))
        loss = th.tensor(np.infty,requires_grad=True)
        for p in new_label_perms:
            loss = th.min(loss,F.nll_loss(new_logits[mask][:,p], new_labels[mask]))
        return loss

In [319]:
loss1 = perm_inv_loss(labels)
print(labels)
print(loss1.compute_loss(logp_ls,mask))

tensor([2, 5, 4,  ..., 1, 0, 2])
tensor(1.9293, grad_fn=<MinBackward2>)


In [343]:
loss1 = perm_inv_loss(labels)
losstests = {i: {j: loss1.approximate_loss(logp_ls,mask,nclasses=i).detach().numpy() for j in range(50)} for i in range(2,7)}

In [339]:
res = pd.DataFrame(losstests).astype(float)
res.std()/res.mean()

2    0.021068
3    0.020606
4    0.035995
5    0.039213
6    0.014262
dtype: float64