# Test Notebook
This notebook is here to test things (ideas/functions/implementations/etc) out. Each section should be independent of any other section.

In [1]:
import torch

In [8]:
features = torch.rand((4,2))*19
features

tensor([[12.7912, 14.0421],
        [17.1440, 10.2480],
        [ 2.7961, 12.3380],
        [ 2.1286, 14.6698]])

In [9]:
a = torch.linalg.norm(features, ord=2, dim=1).view(-1,1)
a

tensor([[18.9946],
        [19.9735],
        [12.6508],
        [14.8235]])

In [13]:
cos_theta = torch.ones((4, 5))
cos_theta

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])

In [14]:
logits = torch.mul(a, cos_theta)
logits

tensor([[18.9946, 18.9946, 18.9946, 18.9946, 18.9946],
        [19.9735, 19.9735, 19.9735, 19.9735, 19.9735],
        [12.6508, 12.6508, 12.6508, 12.6508, 12.6508],
        [14.8235, 14.8235, 14.8235, 14.8235, 14.8235]])

## Testing fancy array indexing

In [1]:
import numpy as np

In [26]:
arr = np.array([[2,3],[4,5]])
arr

array([[2, 3],
       [4, 5]])

In [27]:
gt = np.array([1, 0])
arr[np.arange(arr.shape[0]), gt]

array([3, 4])

In [19]:
arr[np.arange(arr.shape[0]), gt] = 5
arr

array([[5, 3],
       [4, 5]])

In [32]:
bool_idx = np.full_like(arr, False).astype(bool)
bool_idx[np.arange(arr.shape[0]), gt] = True

In [34]:
bool_idx

array([[False,  True],
       [ True, False]])

## Analyzing Training/Evaluation Scores

In [15]:
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt

In [2]:
training_scores = defaultdict(lambda: defaultdict(dict))
protocol = 2
loss = 'entropic'

file_path = f'experiments/Protocol_{protocol}/{loss}_train_arr.npz'
data = np.load(file_path)
for key in data.keys():  # keys are the data description, e.g., train_loss, val_conf_unk
    training_scores[protocol][loss][key] = data[key]

In [3]:
training_scores

defaultdict(<function __main__.<lambda>()>,
            {2: defaultdict(dict,
                         {'entropic': {'epochs': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                                  34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
                                  51, 52, 53, 54, 55, 56, 57, 58, 59]),
                           'val_conf_kn': array([0.03918746, 0.04027894, 0.04389453, 0.04733944, 0.05110316,
                                  0.05183748, 0.05764904, 0.05810191, 0.05808123, 0.07409267,
                                  0.07164437, 0.05348416, 0.08372325, 0.09768613, 0.09901144,
                                  0.1096348 , 0.0947654 , 0.12071757, 0.1275288 , 0.13693486,
                                  0.12849972, 0.13774525, 0.13266587, 0.21669345, 0.19207108,
                                  0.1

In [7]:
# loading evaluation data/scores
scores = defaultdict(lambda: defaultdict(dict))
ground_truths = {}

protocol = 2
loss = 'entropic'
alg = 'threshold'

score_file = f"experiments/Protocol_{protocol}/{loss}_{alg}_test_arr_curr.npz"
data = np.load(score_file)



In [9]:
list(data.keys())

['gt', 'logits', 'features', 'scores']

In [12]:
data['features'].shape

(5800, 30)

In [13]:
data['scores'].shape

(5800, 30)

In [14]:
data['gt'].shape

(5800,)

In [17]:
plt.line(data['scores'][:,0])

AttributeError: module 'matplotlib.pyplot' has no attribute 'line'

## EMNIST and Devanagari Data Preprocessing

In [36]:
import torch
from torchvision import transforms
from openset_imagenet.dataset import OSCToyDataset, ImagenetDataset
import pathlib
from torch.utils.data import DataLoader

In [44]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

# simulate cfg file
cfg_dict = {
    'data': {
        'osc_toy_path': '/local/scratch/bergh/OSCToyData/',
        'imagenet_path': '/local/scratch/datasets/ImageNet/ILSVRC2012/', # ILSVRC2012 path
        'train_file': 'protocols/p{}_train.csv',        # relative to data directory
        'val_file':   'protocols/p{}_val.csv'          # relative to data directory
    },
    'protocol': 0,
    'batch_size': 32,
    'workers': 4
}

cfg = dotdict(cfg_dict)
cfg.data = dotdict(cfg.data)

In [45]:
train_tr = transforms.Compose([transforms.ToTensor()])
val_tr = transforms.Compose([transforms.ToTensor()])

# load datasets

train_file = pathlib.Path(cfg.data.train_file.format(cfg.protocol))
val_file = pathlib.Path(cfg.data.val_file.format(cfg.protocol))
print(train_file)
print(val_file)

train_ds = OSCToyDataset(
    csv_file=train_file,
    imagenet_path=cfg.data.osc_toy_path,
    transform=train_tr
)
val_ds = OSCToyDataset(
    csv_file=val_file,
    imagenet_path=cfg.data.osc_toy_path,
    transform=val_tr
)



# create data loader

train_loader = DataLoader(
        train_ds,
        batch_size=cfg.batch_size,
        shuffle=True,
        num_workers=cfg.workers,
        pin_memory=True)

protocols/p0_train.csv
protocols/p0_val.csv


In [46]:
train_ds[0][0].size()

torch.Size([1, 28, 28])

## Finding $s$

In [1]:
import numpy as np
import torch
import torch.nn.functional as F

In [6]:
def round_to_one(d):
    """find smallest number n < 1 that rounds to 1 when rounding to d decimals."""
    numerator = (10**np.arange(1,d+1)).sum()*9 + 5
    denominator = 10**(d+1)
    return numerator/denominator

In [7]:
def test():
    for d in range(10):
        n = round_to_one(d)
        print(f"d={d}, rounded={round(n,d)}, n={n}")

test()

d=0, rounded=0.0, n=0.5
d=1, rounded=1.0, n=0.95
d=2, rounded=1.0, n=0.995
d=3, rounded=1.0, n=0.9995
d=4, rounded=1.0, n=0.99995
d=5, rounded=1.0, n=0.999995
d=6, rounded=1.0, n=0.9999995
d=7, rounded=1.0, n=0.99999995
d=8, rounded=1.0, n=0.999999995
d=9, rounded=1.0, n=0.9999999995


In [8]:
def find_s(d, C, alpha):
    """
    find lower strict bound on upper bound (beta) of logits s.t. the softmax for a class can still reach 1.

    logit_i in [alpha, beta] forall i in {1, ..., C}

    parameters
        d(int): decimals to which round softmax
        C(int): nr of classes, i.e., dimension of softmax
        alpha(float): lower bound of logits.
    """
    rounding_precision = round_to_one(d)
    return np.log(rounding_precision/(1-rounding_precision)) + np.log(C-1) + alpha


In [96]:
C = 151
s = find_s(5, C, 0)/2
print(s)
logits = torch.zeros(C)
logits[0] = s
logits[1:] = -s

8.60835146980369


In [97]:
logits

tensor([ 8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.60

In [98]:
softmax = F.softmax(logits)
softmax

  softmax = F.softmax(logits)


tensor([1.0000e+00, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-

In [99]:
softmax[0]

tensor(1.0000)

In [85]:
3.3333e-08

3.3333e-08

Seeing as in practice the logits are almost exclusively positive, we can use the 

## Looking at evaluation scores

In [13]:
scores = np.load('experiments/Protocol_0/cosface_threshold_test_arr_curr.npz')

In [17]:
[k for k in scores.keys()]

['gt', 'logits', 'features', 'scores', 'angles']

In [18]:
scores['logits']

array([[ 189.6681  ,  122.60147 ,  191.20335 , ...,  194.09506 ,
         191.19868 ,  194.78024 ],
       [ 166.64642 ,   30.251019,  166.52911 , ...,  166.12    ,
         166.52957 ,  165.98073 ],
       [ 292.21848 ,  232.69762 ,  295.4129  , ...,  301.5362  ,
         295.40314 ,  303.01102 ],
       ...,
       [  64.091675, -100.51866 ,   61.923904, ...,   57.49604 ,
          61.93067 ,   56.369328],
       [ 168.76585 ,   -8.219255,  167.91168 , ...,  166.01784 ,
         167.91441 ,  165.50502 ],
       [  79.10963 ,  -91.049225,   77.058914, ...,   72.850945,
          77.06532 ,   71.7762  ]], dtype=float32)