In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from torch.nn import functional as F
import torch.nn as nn
import torch.optim as optim

from library import architectures, tools, losses, dataset

import pathlib

import time # EDITTED:

import numpy as np

In [3]:
def check_dataset(split_ratio, train_emnist, val_emnist, test_neg_emnist, test_unkn_emnist):
    
    split_ratio_out = len(train_emnist)/(len(train_emnist)+len(val_emnist))
    assert split_ratio_out==split_ratio, f"{split_ratio_out} =! {split_ratio}"

    print(f"# of train: {len(train_emnist)}")
    print(f"Train include Negatives? {len(train_emnist.datasets)>1}")
    if len(train_emnist.datasets)>1:
        idxs = np.random.randint(len(train_emnist.datasets[1]), size=3)
        print(f"\tTrain Neg. examples: {[train_emnist.datasets[1].check(i) for i in idxs]}")
    print(f"Train set: {[(subdt.dataset.split, len(subdt.indices), np.unique(subdt.dataset.targets[subdt.indices]).tolist()) for subdt in train_emnist.datasets]}")

    print()

    print(f"# of val: {len(val_emnist)}")
    print(f"Val include Negatives? {len(val_emnist.datasets)>1}")
    if len(val_emnist.datasets)>1:
        idxs = np.random.randint(len(val_emnist.datasets[1]), size=3)
        print(f"\tVal Neg. examples: {[val_emnist.datasets[1].check(i) for i in idxs]}")
    print(f"Val set: {[(subdt.dataset.split, len(subdt.indices), np.unique(subdt.dataset.targets[subdt.indices]).tolist()) for subdt in val_emnist.datasets]}")

    print()

    print(f"# of kn-unkn test: {len(test_neg_emnist)}")
    print(f"Test include Unknowns? {len(test_neg_emnist.datasets)>1}")
    if len(test_neg_emnist.datasets)>1:
        idxs = np.random.randint(len(test_neg_emnist.datasets[1]), size=3)
        print(f"\tTest kn-unkn examples: {[test_neg_emnist.datasets[1].check(i) for i in idxs]}")
    print(f"Test set: {[(subdt.dataset.split, len(subdt.indices), np.unique(subdt.dataset.targets[subdt.indices]).tolist()) for subdt in test_neg_emnist.datasets]}")

    print()

    print(f"# of unkn-unkn test: {len(test_unkn_emnist)}")
    print(f"Test include Unknowns? {len(test_unkn_emnist.datasets)>1}")
    if len(test_unkn_emnist.datasets)>1:
        idxs = np.random.randint(len(test_unkn_emnist.datasets[1]), size=3)
        print(f"\tTest unkn-unkn examples: {[test_unkn_emnist.datasets[1].check(i) for i in idxs]}")
    print(f"Test set: {[(subdt.dataset.split, len(subdt.indices), np.unique(subdt.dataset.targets[subdt.indices]).tolist()) for subdt in test_unkn_emnist.datasets]}")

In [4]:
dataset_root = '/cluster/scratch/khyeongkyun/UZH-MT/data'
split_ratio = 0.8

## Case 1. Dataset Call w/o using Negatives in training

In [5]:
emnist = dataset.EMNIST(dataset_root)
train_emnist, val_emnist = emnist.get_train_set(split_ratio)
test_emnist, test_neg_emnist, test_unkn_emnist = emnist.get_test_set()

check_dataset(split_ratio, train_emnist, val_emnist, test_neg_emnist, test_unkn_emnist)


# of train: 48000
Train include Negatives? False
Train set: [('mnist', 48000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])]

# of val: 12000
Val include Negatives? False
Val set: [('mnist', 12000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])]

# of kn-unkn test: 18800
Test include Unknowns? True
	Test kn-unkn examples: [(4237, 6, -1), (3957, 5, -1), (360, 1, -1)]
Test set: [('mnist', 10000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 8800, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of unkn-unkn test: 18800
Test include Unknowns? True
	Test unkn-unkn examples: [(1799, 18, -1), (7609, 25, -1), (1000, 17, -1)]
Test set: [('mnist', 10000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 8800, [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26])]


## Case 2. Dataset Call with using Negatives in training.

In [6]:
emnist = dataset.EMNIST(dataset_root)
train_emnist, val_emnist = emnist.get_train_set(split_ratio, include_negatives=True)
test_emnist, test_neg_emnist, test_unkn_emnist = emnist.get_test_set()

check_dataset(split_ratio, train_emnist, val_emnist, test_neg_emnist, test_unkn_emnist)

# of train: 90240
Train include Negatives? True
	Train Neg. examples: [(36106, 14, -1), (10267, 11, -1), (19997, 13, -1)]
Train set: [('mnist', 48000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 42240, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of val: 22560
Val include Negatives? True
	Val Neg. examples: [(5300, 2, -1), (1908, 6, -1), (10127, 4, -1)]
Val set: [('mnist', 12000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 10560, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of kn-unkn test: 18800
Test include Unknowns? True
	Test kn-unkn examples: [(6222, 10, -1), (296, 1, -1), (2063, 3, -1)]
Test set: [('mnist', 10000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 8800, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of unkn-unkn test: 18800
Test include Unknowns? True
	Test unkn-unkn examples: [(3187, 19, -1), (4509, 21, -1), (2186, 18, -1)]
Test set: [('mnist', 10000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 8800, [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26])]


## Case 3. Dataset Call with using Negatives in training and having background class

In [7]:
emnist = dataset.EMNIST(dataset_root)
train_emnist, val_emnist = emnist.get_train_set(split_ratio, include_negatives=True, has_background_class=True)
test_emnist, test_neg_emnist, test_unkn_emnist = emnist.get_test_set(has_background_class=True)

check_dataset(split_ratio, train_emnist, val_emnist, test_neg_emnist, test_unkn_emnist)

# of train: 90240
Train include Negatives? True
	Train Neg. examples: [(1688, 8, 10), (32803, 5, 10), (2511, 6, 10)]
Train set: [('mnist', 48000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 42240, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of val: 22560
Val include Negatives? True
	Val Neg. examples: [(8499, 8, 10), (3295, 6, 10), (1690, 2, 10)]
Val set: [('mnist', 12000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 10560, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of kn-unkn test: 18800
Test include Unknowns? True
	Test kn-unkn examples: [(4627, 6, 10), (550, 1, 10), (8204, 14, 10)]
Test set: [('mnist', 10000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 8800, [1, 2, 3, 4, 5, 6, 8, 10, 11, 13, 14])]

# of unkn-unkn test: 18800
Test include Unknowns? True
	Test unkn-unkn examples: [(2500, 19, 10), (4655, 21, 10), (1826, 18, 10)]
Test set: [('mnist', 10000, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('letters', 8800, [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26])]


# Archieve