# EMNIST and Devanagari Data Preprocessing

In [36]:
import torch
from torchvision import transforms
from openset_imagenet.dataset import OSCToyDataset, ImagenetDataset
import pathlib
from torch.utils.data import DataLoader

In [44]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

# simulate cfg file
cfg_dict = {
    'data': {
        'osc_toy_path': '/local/scratch/bergh/OSCToyData/',
        'imagenet_path': '/local/scratch/datasets/ImageNet/ILSVRC2012/', # ILSVRC2012 path
        'train_file': 'protocols/p{}_train.csv',        # relative to data directory
        'val_file':   'protocols/p{}_val.csv'          # relative to data directory
    },
    'protocol': 0,
    'batch_size': 32,
    'workers': 4
}

cfg = dotdict(cfg_dict)
cfg.data = dotdict(cfg.data)

In [45]:
train_tr = transforms.Compose([transforms.ToTensor()])
val_tr = transforms.Compose([transforms.ToTensor()])

# load datasets

train_file = pathlib.Path(cfg.data.train_file.format(cfg.protocol))
val_file = pathlib.Path(cfg.data.val_file.format(cfg.protocol))
print(train_file)
print(val_file)

train_ds = OSCToyDataset(
    csv_file=train_file,
    imagenet_path=cfg.data.osc_toy_path,
    transform=train_tr
)
val_ds = OSCToyDataset(
    csv_file=val_file,
    imagenet_path=cfg.data.osc_toy_path,
    transform=val_tr
)



# create data loader

train_loader = DataLoader(
        train_ds,
        batch_size=cfg.batch_size,
        shuffle=True,
        num_workers=cfg.workers,
        pin_memory=True)

protocols/p0_train.csv
protocols/p0_val.csv


In [46]:
train_ds[0][0].size()

torch.Size([1, 28, 28])

# Finding $s$

In [1]:
import numpy as np
import torch
import torch.nn.functional as F

In [6]:
def round_to_one(d):
    """find smallest number n < 1 that rounds to 1 when rounding to d decimals."""
    numerator = (10**np.arange(1,d+1)).sum()*9 + 5
    denominator = 10**(d+1)
    return numerator/denominator

In [7]:
def test():
    for d in range(10):
        n = round_to_one(d)
        print(f"d={d}, rounded={round(n,d)}, n={n}")

test()

d=0, rounded=0.0, n=0.5
d=1, rounded=1.0, n=0.95
d=2, rounded=1.0, n=0.995
d=3, rounded=1.0, n=0.9995
d=4, rounded=1.0, n=0.99995
d=5, rounded=1.0, n=0.999995
d=6, rounded=1.0, n=0.9999995
d=7, rounded=1.0, n=0.99999995
d=8, rounded=1.0, n=0.999999995
d=9, rounded=1.0, n=0.9999999995


In [8]:
def find_s(d, C, alpha):
    """
    find lower strict bound on upper bound (beta) of logits s.t. the softmax for a class can still reach 1.

    logit_i in [alpha, beta] forall i in {1, ..., C}

    parameters
        d(int): decimals to which round softmax
        C(int): nr of classes, i.e., dimension of softmax
        alpha(float): lower bound of logits.
    """
    rounding_precision = round_to_one(d)
    return np.log(rounding_precision/(1-rounding_precision)) + np.log(C-1) + alpha


In [96]:
C = 151
s = find_s(5, C, 0)/2
print(s)
logits = torch.zeros(C)
logits[0] = s
logits[1:] = -s

8.60835146980369


In [97]:
logits

tensor([ 8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084, -8.6084,
        -8.6084, -8.6084, -8.6084, -8.60

In [98]:
softmax = F.softmax(logits)
softmax

  softmax = F.softmax(logits)


tensor([1.0000e+00, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08, 3.3333e-08,
        3.3333e-08, 3.3333e-08, 3.3333e-

In [99]:
softmax[0]

tensor(1.0000)

In [85]:
3.3333e-08

3.3333e-08