In [1]:
# codebase from hyperspherical prototype networks, Pascal Mettes, NeurIPS2019
import numpy as np
import argparse
import math
import os
import torch
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms

In [2]:

def parse_args():
    parser = argparse.ArgumentParser(description="classification")
    parser.add_argument("--data_name", dest="data_name", default="cifar100",
                        choices=["cifar100", "cifar10", "cub"], type=str)  # choose tha name of the dataset

    parser.add_argument("--datadir", dest="datadir", default="dat/", type=str)
    parser.add_argument("--resdir", dest="resdir", default="res/", type=str)
    parser.add_argument("--hpnfile", dest="hpnfile", default="", type=str)
    parser.add_argument("--logdir", dest="logdir", default="", type=str)
    parser.add_argument("--loss", dest="loss_name", default="PeBuseLoss", type=str)

    parser.add_argument("-n", dest="network", default="resnet32", type=str)
    parser.add_argument("-r", dest="optimizer", default="sgd", type=str)
    parser.add_argument("-l", dest="learning_rate", default=0.01, type=float)
    parser.add_argument("-m", dest="momentum", default=0.9, type=float)
    parser.add_argument("-c", dest="decay", default=0.0001, type=float)
    parser.add_argument("-s", dest="batch_size", default=128, type=int)
    parser.add_argument("-e", dest="epochs", default=250, type=int)
    parser.add_argument("-p", dest="penalty", default='dim', type=str)  # choose penalty in loss
    parser.add_argument("--mult", dest="mult", default=0.1, type=float)
    parser.add_argument("--curv", dest="curv", default=1.0, type=float)

    parser.add_argument("--seed", dest="seed", default=100, type=int)
    parser.add_argument("--drop1", dest="drop1", default=500, type=int)
    parser.add_argument("--drop2", dest="drop2", default=1000, type=int)
    parser.add_argument("--do_decay", dest="do_decay", default=False, type=bool)
    args = parser.parse_args()
    return args


################################################################################
# General helpers.
################################################################################

#
# Count the number of learnable parameters in a model.
#
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


#
# Get the desired optimizer.
#
def get_optimizer(optimname, params, learning_rate, momentum, decay):
    if optimname == "sgd":
        optimizer = optim.SGD(params, lr=learning_rate, momentum=momentum, weight_decay=decay)
    elif optimname == "adadelta":
        optimizer = optim.Adadelta(params, lr=learning_rate, weight_decay=decay)
    elif optimname == "adam":
        optimizer = optim.Adam(params, lr=learning_rate, weight_decay=decay)
    elif optimname == "adamW":
        optimizer = optim.AdamW(params, lr=learning_rate, weight_decay=decay)
    elif optimname == "rmsprop":
        optimizer = optim.RMSprop(params, lr=learning_rate, weight_decay=decay, momentum=momentum)
    elif optimname == "asgd":
        optimizer = optim.ASGD(params, lr=learning_rate, weight_decay=decay)
    elif optimname == "adamax":
        optimizer = optim.Adamax(params, lr=learning_rate, weight_decay=decay)
    else:
        print('Your option for the optimizer is not available, I am loading SGD.')
        optimizer = optim.SGD(params, lr=learning_rate, momentum=momentum, weight_decay=decay)

    return optimizer


################################################################################
# Standard dataset loaders.
################################################################################
def load_dataset(dataset_name, basedir, batch_size, kwargs): # kwargs = {'num_workers': 64, 'pin_memory': True}
    if dataset_name == 'cifar100':
        return load_cifar100(basedir, batch_size, kwargs) # that is mine
    else:
        raise Exception('Selected dataset is not available.')

        # I gues they are working with a map-style dataset since the compute the len


def load_cifar100(basedir, batch_size, kwargs): # That is mine as well
    # Input channels normalization.
    mrgb = [0.507, 0.487, 0.441]
    srgb = [0.267, 0.256, 0.276]
    normalize = transforms.Normalize(mean=mrgb, std=srgb)

    # Load train data.
    trainloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root=basedir + 'cifar100/', train=True,
                          transform=transforms.Compose([
                              transforms.RandomCrop(32, 4), # Crop randomly the image in a sample
                              transforms.RandomHorizontalFlip(),
                              transforms.ToTensor(), #  to convert the numpy images to torch images (we need to swap axes)
                              normalize,
                          ]), download=True),
        batch_size=batch_size, shuffle=True, **kwargs)

    # Labels to torch.
    trainloader.dataset.train_labels = torch.from_numpy(np.array(trainloader.dataset.targets))

    # Load test data.
    testloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root=basedir + 'cifar100/', train=False,
                          transform=transforms.Compose([
                              transforms.ToTensor(),
                              normalize,
                          ])),
        batch_size=batch_size, shuffle=True, **kwargs)

    # Labels to torch.
    testloader.dataset.test_labels = torch.from_numpy(np.array(testloader.dataset.targets))

    return trainloader, testloader



In [3]:
kwargs = {'num_workers': 64, 'pin_memory': True}
batch_size = 128
basedir = '/Users/dj/Documents/GitHub/Master_Thesis/Code/HBL_GPU/data/'
data_name = 'cifar100'

def load_cifar100(basedir, batch_size, kwargs): # That is mine as well
    # Input channels normalization.
    mrgb = [0.507, 0.487, 0.441]
    srgb = [0.267, 0.256, 0.276]
    normalize = transforms.Normalize(mean=mrgb, std=srgb)

    # Load train data.
    trainloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root=basedir + 'cifar100/', train=True,
                          transform=transforms.Compose([
                              transforms.RandomCrop(32, 4), # Crop randomly the image in a sample
                              transforms.RandomHorizontalFlip(),
                              transforms.ToTensor(), #  to convert the numpy images to torch images (we need to swap axes)
                              normalize,
                          ]), download=True),
        batch_size=batch_size, shuffle=True, **kwargs)

    # Labels to torch.
    trainloader.dataset.train_labels = torch.from_numpy(np.array(trainloader.dataset.targets))

    # Load test data.
    testloader = torch.utils.data.DataLoader(
        datasets.CIFAR100(root=basedir + 'cifar100/', train=False,
                          transform=transforms.Compose([
                              transforms.ToTensor(),
                              normalize,
                          ])),
        batch_size=batch_size, shuffle=True, **kwargs)

    # Labels to torch.
    testloader.dataset.test_labels = torch.from_numpy(np.array(testloader.dataset.targets))

    return trainloader, testloader


In [4]:
kwargs = {'num_workers': 64, 'pin_memory': True}
batch_size = 128
basedir = '/Users/dj/Documents/GitHub/Master_Thesis/Code/HBL_GPU/data/'
data_name = 'cifar100'
args = ['cifar100', basedir, batch_size]

trainloader, testloader = load_cifar100(basedir, batch_size, kwargs) 
# trainloader, testloader = load_dataset('cifar100', basedir, batch_size, kwargs)

Files already downloaded and verified


  cpuset_checked))


In [16]:
type(trainloader)

torch.utils.data.dataloader.DataLoader

In [18]:
type(trainloader.dataset)

torchvision.datasets.cifar.CIFAR100

In [21]:
len(trainloader.dataset)

50000

In [6]:
trainloader.dataset[0]

(tensor([[[-1.8989, -1.8989, -1.8989,  ..., -1.8989, -1.8989, -1.8989],
          [-1.8989, -1.8989, -1.8989,  ..., -1.8989, -1.8989, -1.8989],
          [-1.8989, -1.8989, -1.8989,  ..., -1.8989, -1.8989, -1.8989],
          ...,
          [-1.8989, -0.3714, -0.6798,  ...,  0.5393,  0.5686,  0.5393],
          [-1.8989, -0.5036, -0.7092,  ...,  0.7302,  0.7743,  0.6127],
          [-1.8989, -0.7239, -0.8707,  ...,  0.4658,  0.4218,  0.4511]],
 
         [[-1.9023, -1.9023, -1.9023,  ..., -1.9023, -1.9023, -1.9023],
          [-1.9023, -1.9023, -1.9023,  ..., -1.9023, -1.9023, -1.9023],
          [-1.9023, -1.9023, -1.9023,  ..., -1.9023, -1.9023, -1.9023],
          ...,
          [-1.9023, -0.5390, -0.8913,  ...,  1.2227,  1.2686,  1.2227],
          [-1.9023, -0.6003, -0.8760,  ...,  1.4065,  1.4678,  1.3299],
          [-1.9023, -0.7534, -0.9832,  ...,  1.1614,  1.1461,  1.1920]],
 
         [[-1.5978, -1.5978, -1.5978,  ..., -1.5978, -1.5978, -1.5978],
          [-1.5978, -1.5978,

In [11]:
type(trainloader.dataset[0])

tuple

In [14]:
type(trainloader.dataset[0][0])

torch.Tensor

In [15]:
type(trainloader.dataset[0][1])

int

In [10]:
trainloader.dataset[0][0].shape

torch.Size([3, 32, 32])

In [41]:
(testloader.dataset[3][0])[2][0][0]

# type dataset[0] = tuple  -> First entry is a Torch Tensor, the second one an int
# shape = torch.Size([3, 32, 32]) # torch image: C x H x W
# type = torch.Tensor

tensor(-0.0491)

In [35]:
trainloader.dataset

Dataset CIFAR100
    Number of datapoints: 50000
    Root location: /Users/dj/Documents/GitHub/Master_Thesis/Code/HBL_GPU/data/cifar100/
    Split: Train
    StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
           )