## Setup

### Imports

In [1]:
import logging
import imp
import dataset
import utils
import proxynca
import net
import torch
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# matplotlib.use('agg', force = True)
import time
import json
import random
from utils import JSONEncoder, json_dumps
from datetime import datetime as dt
import pandas as pd
import os

### Args Class

In [2]:
class args():
    dataset = 'cars'
    config = 'config1.json'
    sz_embedding = 64 #size of the embedding that is appendet to inceptionv2
    sz_batch = 32 #number of samples per batch
    nb_epochs = 40
    gpu_id = 0
    nb_workers = 4
    with_nmi = True  #turn calculations for nmi on or off turn off for sop
    scaling_x = 3.0 #scaling factor for the normalized embeddings
    scaling_p = 3.0 #scaling factor for the normalized proxies
    lr_proxynca = 1.0 #learning rate for proxynca
    log_filename = (f'''{dataset}-{dt.now().strftime("%Y%m%d-%H%M%S")}''')
    results_filename = f'{dataset}-results.csv'
    torch_version = str(torch.__version__)
    edition = 0
    seed = 0

### Seed Everything

In [3]:
def seed_everything(args = args):
    seed = args.seed
    if seed != -1:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    else:
        print('not seeded')


### Choose Device

In [4]:
torch.cuda.set_device(args.gpu_id)

### Setup the config

In [5]:
def setup_config(args = args):
    config = utils.load_config(args.config)
    config['criterion']['args']['scaling_x'] = args.scaling_x
    config['criterion']['args']['scaling_p'] = args.scaling_p
    config['opt']['args']['proxynca']['lr'] = args.lr_proxynca
    return config

### DataLoader

In [6]:
def load_tr(config = setup_config(), args = args):
    dl_tr = torch.utils.data.DataLoader(
        dataset.load(name = args.dataset,
                     root = config['dataset'][args.dataset]['root'],
                     classes = config['dataset'][args.dataset]['classes']['train'],
                     transform = dataset.utils.make_transform(**config['transform_parameters'])
                    ),
        batch_size = args.sz_batch,
        shuffle = True,
        num_workers = args.nb_workers,
        drop_last = True,
        pin_memory = True
    )
    return dl_tr

def load_ev(config = setup_config(), args = args):
    dl_ev = torch.utils.data.DataLoader(
        dataset.load(
            name = args.dataset,
            root = config['dataset'][args.dataset]['root'],
            classes = config['dataset'][args.dataset]['classes']['eval'],
            transform = dataset.utils.make_transform(
                **config['transform_parameters'],
                is_train = False)
        ),
        batch_size = args.sz_batch,
        shuffle = False,
        num_workers = args.nb_workers,
        pin_memory = True
    )
    return dl_ev


### Set up the net

In [7]:
def setup_model(args = args):
    model = net.bn_inception(pretrained = True)
    net.embed(model, sz_embedding = args.sz_embedding)
    model = model.cuda()
    return model


In [8]:
def setup_criterion(config = setup_config(), args = args, dl_tr = load_tr()):
    criterion = proxynca.ProxyNCA(
        nb_classes = dl_tr.dataset.nb_classes(),
        sz_embedding = args.sz_embedding,
        **config['criterion']['args']).cuda()
    return criterion

### Set up Optimizer

In [9]:
def setup_opt(config = setup_config(), model = setup_model(), criterion = setup_criterion()):
    opt = config['opt']['type'](
        [
            { # inception parameters, excluding embedding layer
                **{'params': list(
                    set(
                        model.parameters()
                    ).difference(
                        set(model.embedding_layer.parameters())
                    )
                )},
                **config['opt']['args']['backbone']
            },
            { # embedding parameters
                **{'params': model.embedding_layer.parameters()},
                **config['opt']['args']['embedding']
            },
            { # proxy nca parameters
                **{'params': criterion.parameters()},
                **config['opt']['args']['proxynca']
            }
        ],
        **config['opt']['args']['base']
    )
    return opt


### Set up scheduler

In [10]:
def setup_scheduler(config = setup_config(), opt = setup_opt()):
    scheduler = config['lr_scheduler']['type'](
        opt, **config['lr_scheduler']['args'])
    return scheduler

### Set up logging

In [11]:
def setup_logging(args = args):
    imp.reload(logging)
    logging.basicConfig(
        format = "%(asctime)s %(message)s",
        level = logging.INFO,
        handlers = [
            logging.FileHandler("{0}/{1}.log".format('log', args.log_filename)),
            logging.StreamHandler()
        ]
    )

    logging.info("Training parameters: {}".format(vars(args)))
    logging.info("Training for {} epochs".format(args.nb_epochs))

## Training

In [12]:
def train_and_test(args = args):
    #set up new parameters
    seed_everything(args)
    config = setup_config(args)
    dl_tr = load_tr(config, args)
    dl_ev = load_ev(config, args)
    model = setup_model(args = args)
    criterion = setup_criterion(config = config, args = args, dl_tr = load_tr())
    opt = setup_opt(config = config, model = model, criterion = criterion)
    scheduler = setup_scheduler(config = config, opt = opt)
    setup_logging(args = args)
    
    if args.with_nmi == True:
        df = pd.DataFrame(columns = ['epoch', 'r@1', 'r@2', 'r@4', 'r@8', 'NMI'])
    else:
        df = pd.DataFrame(columns = ['epoch', 'r@1', 'r@2', 'r@4','r@8'])
    
    losses = []
    t1 = time.time()
    logging.info("**Evaluating initial model.**")
    with torch.no_grad():
        utils.evaluate(model, dl_ev, with_nmi = args.with_nmi)

    for e in range(0, args.nb_epochs):
        if e!=0:
            scheduler.step()
        time_per_epoch_1 = time.time()
        losses_per_epoch = []
        for x,y, _ in dl_tr:
            opt.zero_grad()
            m = model(x.cuda())
            loss = criterion(m, y.cuda())
            loss.backward()

#             torch.nn.utils.clip_grad_value_(model.parameters(), 10)
            
            losses_per_epoch.append(loss.data.cpu().numpy())
            opt.step()

        time_per_epoch_2 = time.time()
        losses.append(np.mean(losses_per_epoch[-20:]))
        logging.info(
            "Epoch: {}, loss: {:.3f}, time (seconds): {:.2f}.".format(
                e,
                losses[-1],
                time_per_epoch_2 - time_per_epoch_1))
        with torch.no_grad():
            logging.info("**Evaluating.**")
            recall = utils.evaluate(model, dl_ev, with_nmi = args.with_nmi)
            # append results of current epoch to df
            if args.with_nmi == True:
                lst = recall[0].copy()
                lst.append(recall[1])
                lst.insert(0,e)
                df_epoch = pd.DataFrame([lst], columns = ['epoch', 'r@1', 'r@2', 'r@4','r@8', 'NMI'])
            else:
                lst = recall.copy()
                lst.insert(0,e)
                df_epoch = pd.DataFrame([lst], columns = ['epoch', 'r@1', 'r@2', 'r@4', 'r@8'])
            df = pd.concat([df,df_epoch])
            model.losses = losses
            model.current_epoch = e

    t2 = time.time()
    logging.info("Total training time (minutes): {:.2f}.".format((t2 - t1) / 60))
    return df

In [None]:
seeds = [0]
lrs = [1]
scaling_xs = [1.0,3.0,8.0]
scaling_ps = [1.0,3.0,8.0]
sz_embs = [64]
sz_batches = [32,128]
eds = [0]

results = {}
if os.path.exists(args.results_filename):
    results_df = pd.read_csv(args.results_filename)
    index = 0
else:
    results_df = pd.DataFrame(columns = ['index','epoch', 'r@1', 'r@2', 'r@4', 'r@8', 'NMI',
                                         'lr','scl_x','scl_p','sz_emb','seed', 'edition',
                                         'batch', 'torch version'])
    index = 0
for lr in lrs:
    args.lr_proxynca = lr
    for scl_x in scaling_xs:
        args.scaling_x = scl_x
        for scl_p in scaling_ps:
            args.scaling_p = scl_p
            for sz_emb in sz_embs:
                args.sz_embeddings = sz_emb
                for seed in seeds:
                    args.seed = seed
                    for ed in eds:
                        args.edition = ed
                        for sz_batch in sz_batches:
                            args.sz_batch = sz_batch
                            if (results_df[(results_df.lr == lr) & (results_df.scl_x == scl_x)
                                               & (results_df.scl_p == scl_p)
                                               & (results_df.sz_emb == sz_emb)
                                               & (results_df.seed == seed)
                                               & (results_df.edition == ed)
                                               & (results_df['torch version'] == args.torch_version)
                                               & (results_df['batch'] == sz_batch)]
                                .shape[0] == 0):
                                if results_df['index'].shape[0] > 0:
                                    index = results_df['index'].max() + 1
                                print(index)
                                res_df = train_and_test()
                                res_df['lr'] = lr
                                res_df['scl_x'] = scl_x
                                res_df['scl_p'] = scl_p
                                res_df['index'] = index
                                res_df['sz_emb'] = sz_emb
                                res_df['seed'] = seed
                                res_df['edition'] = ed
                                res_df['batch'] = sz_batch
                                res_df['torch version'] = args.torch_version
                                results_df = pd.concat([results_df, res_df])
                                results_df.to_csv(args.results_filename, index = False)
                                index+=1
                            else:
                                print(f'skipped version:{index}. It was already done.')
                                index+=1

0


2020-09-15 23:22:16,192 Training parameters: {'__module__': '__main__', 'dataset': 'cars', 'config': 'config1.json', 'sz_embedding': 64, 'sz_batch': 32, 'nb_epochs': 40, 'gpu_id': 0, 'nb_workers': 4, 'with_nmi': True, 'scaling_x': 1.0, 'scaling_p': 1.0, 'lr_proxynca': 1, 'log_filename': 'cars-20200915-232212', 'results_filename': 'cars-results.csv', 'torch_version': '1.1.0', 'edition': 0, 'seed': 0, '__dict__': <attribute '__dict__' of 'args' objects>, '__weakref__': <attribute '__weakref__' of 'args' objects>, '__doc__': None, 'sz_embeddings': 64}
2020-09-15 23:22:16,193 Training for 40 epochs
2020-09-15 23:22:16,195 **Evaluating initial model.**
2020-09-15 23:23:13,444 NMI: 32.712
2020-09-15 23:23:19,328 R@1 : 28.422
2020-09-15 23:23:19,504 R@2 : 39.282
2020-09-15 23:23:19,680 R@4 : 51.703
2020-09-15 23:23:19,856 R@8 : 64.457
2020-09-15 23:24:11,817 Epoch: 0, loss: 4.028, time (seconds): 51.96.
2020-09-15 23:24:11,818 **Evaluating.**
2020-09-15 23:24:50,596 NMI: 43.347
2020-09-15 23:

2020-09-15 23:58:45,339 Epoch: 25, loss: 3.098, time (seconds): 39.20.
2020-09-15 23:58:45,340 **Evaluating.**
2020-09-15 23:59:23,104 NMI: 48.733
2020-09-15 23:59:28,137 R@1 : 39.159
2020-09-15 23:59:28,313 R@2 : 51.347
2020-09-15 23:59:28,489 R@4 : 63.535
2020-09-15 23:59:28,665 R@8 : 74.038
2020-09-16 00:00:07,881 Epoch: 26, loss: 3.094, time (seconds): 39.21.
2020-09-16 00:00:07,882 **Evaluating.**
2020-09-16 00:00:45,270 NMI: 45.647
2020-09-16 00:00:50,299 R@1 : 36.084
2020-09-16 00:00:50,476 R@2 : 47.928
2020-09-16 00:00:50,653 R@4 : 59.661
2020-09-16 00:00:50,830 R@8 : 70.434
2020-09-16 00:01:30,009 Epoch: 27, loss: 3.127, time (seconds): 39.18.
2020-09-16 00:01:30,010 **Evaluating.**
2020-09-16 00:02:07,619 NMI: 45.410
2020-09-16 00:02:12,702 R@1 : 36.232
2020-09-16 00:02:12,879 R@2 : 47.460
2020-09-16 00:02:13,064 R@4 : 59.070
2020-09-16 00:02:13,240 R@8 : 69.918
2020-09-16 00:02:52,641 Epoch: 28, loss: 3.109, time (seconds): 39.40.
2020-09-16 00:02:52,642 **Evaluating.**
2020

1


2020-09-16 00:19:12,938 Training parameters: {'__module__': '__main__', 'dataset': 'cars', 'config': 'config1.json', 'sz_embedding': 64, 'sz_batch': 128, 'nb_epochs': 40, 'gpu_id': 0, 'nb_workers': 4, 'with_nmi': True, 'scaling_x': 1.0, 'scaling_p': 1.0, 'lr_proxynca': 1, 'log_filename': 'cars-20200915-232212', 'results_filename': 'cars-results.csv', 'torch_version': '1.1.0', 'edition': 0, 'seed': 0, '__dict__': <attribute '__dict__' of 'args' objects>, '__weakref__': <attribute '__weakref__' of 'args' objects>, '__doc__': None, 'sz_embeddings': 64}
2020-09-16 00:19:12,939 Training for 40 epochs
2020-09-16 00:19:12,941 **Evaluating initial model.**
2020-09-16 00:19:53,330 NMI: 32.712
2020-09-16 00:19:59,898 R@1 : 28.422
2020-09-16 00:20:00,102 R@2 : 39.282
2020-09-16 00:20:00,297 R@4 : 51.703
2020-09-16 00:20:00,479 R@8 : 64.457
2020-09-16 00:20:31,769 Epoch: 0, loss: 4.294, time (seconds): 31.29.
2020-09-16 00:20:31,771 **Evaluating.**
2020-09-16 00:21:11,573 NMI: 39.126
2020-09-16 00

2020-09-16 00:50:57,348 Epoch: 25, loss: 3.309, time (seconds): 28.41.
2020-09-16 00:50:57,349 **Evaluating.**
2020-09-16 00:51:34,853 NMI: 54.637
2020-09-16 00:51:40,563 R@1 : 52.011
2020-09-16 00:51:40,748 R@2 : 64.297
2020-09-16 00:51:40,926 R@4 : 74.751
2020-09-16 00:51:41,104 R@8 : 83.298
2020-09-16 00:52:09,850 Epoch: 26, loss: 3.302, time (seconds): 28.74.
2020-09-16 00:52:09,852 **Evaluating.**
2020-09-16 00:52:47,060 NMI: 54.429
2020-09-16 00:52:52,723 R@1 : 51.519
2020-09-16 00:52:52,904 R@2 : 64.150
2020-09-16 00:52:53,089 R@4 : 74.493
2020-09-16 00:52:53,270 R@8 : 83.016
2020-09-16 00:53:22,367 Epoch: 27, loss: 3.282, time (seconds): 29.09.
2020-09-16 00:53:22,368 **Evaluating.**
2020-09-16 00:53:59,608 NMI: 54.636
2020-09-16 00:54:05,205 R@1 : 52.072
2020-09-16 00:54:05,402 R@2 : 64.777
2020-09-16 00:54:05,583 R@4 : 74.886
2020-09-16 00:54:05,763 R@8 : 83.372
2020-09-16 00:54:34,335 Epoch: 28, loss: 3.286, time (seconds): 28.57.
2020-09-16 00:54:34,336 **Evaluating.**
2020

2


2020-09-16 01:08:31,447 Training parameters: {'__module__': '__main__', 'dataset': 'cars', 'config': 'config1.json', 'sz_embedding': 64, 'sz_batch': 32, 'nb_epochs': 40, 'gpu_id': 0, 'nb_workers': 4, 'with_nmi': True, 'scaling_x': 1.0, 'scaling_p': 3.0, 'lr_proxynca': 1, 'log_filename': 'cars-20200915-232212', 'results_filename': 'cars-results.csv', 'torch_version': '1.1.0', 'edition': 0, 'seed': 0, '__dict__': <attribute '__dict__' of 'args' objects>, '__weakref__': <attribute '__weakref__' of 'args' objects>, '__doc__': None, 'sz_embeddings': 64}
2020-09-16 01:08:31,448 Training for 40 epochs
2020-09-16 01:08:31,454 **Evaluating initial model.**
2020-09-16 01:09:11,895 NMI: 32.712
2020-09-16 01:09:18,229 R@1 : 28.422
2020-09-16 01:09:18,411 R@2 : 39.282
2020-09-16 01:09:18,596 R@4 : 51.703
2020-09-16 01:09:18,776 R@8 : 64.457
2020-09-16 01:09:59,677 Epoch: 0, loss: 3.544, time (seconds): 40.90.
2020-09-16 01:09:59,679 **Evaluating.**
2020-09-16 01:10:40,457 NMI: 43.925
2020-09-16 01: