In [1]:
%load_ext autoreload
%autoreload 2
import torch
from torchvision import datasets, transforms
torch.manual_seed(42)
import json
import os.path

from twosample import get_initial_clustering, get_embeddings, merge, utils
from twosample.affinity import average_linkage_affinity, twosample_affinity, fast_twosample_affinity

In [2]:
hp = {}
hp['dataset'] = 'imagenet-10'
hp['initial_clustering'] = {}
hp['initial_clustering']['num_initial_clusters'] = 20
hp['initial_clustering']['initial_cluster_size'] = 100
hp['affinity'] = {}
hp['affinity']['type'] = 'averagelinkage'
hp['affinity']['epochs'] = 50
hp['affinity']['batch_size'] = 64
hp['affinity']['train_ratio'] = 0.5
hp['affinity']['log_interval'] = -1 # disabled
hp['device'] = torch.device('cuda')
hp['desired_num_merges'] = 7

In [3]:
def run_experiment(hp):
    experiment_directory = 'results/3-experiment/{}'.format(hp['dataset'])
    utils.create_directory_if_not_exists(experiment_directory)
    
    experiment_filename = experiment_directory + '/{}x{}_{}.json'.format(
        hp['initial_clustering']['num_initial_clusters'],
        hp['initial_clustering']['initial_cluster_size'],
        hp['affinity']['type'])

    if os.path.isfile(experiment_filename):
        print('Experiment already exists at:', experiment_filename)
    else:
        print('Running experiment with hyperparameters:\n{}'.format(hp))
        result = {}
        transform_chain = [transforms.Resize(256),
                           transforms.CenterCrop(224),
                           transforms.ToTensor()]
        dataset = datasets.ImageFolder('./datasets/' + hp['dataset'],
                                       transform=transforms.Compose(transform_chain))

        current_clustering = get_initial_clustering(hp)
        result['initial_clustering'] = current_clustering

        if hp['affinity']['type'] == 'averagelinkage':
            embeddings = get_embeddings(hp)

        result['merges'] = []
        for merge_index in range(hp['desired_num_merges']):
            print('Merge index:', merge_index)
            if hp['affinity']['type'] == 'averagelinkage':
                affinities = average_linkage_affinity(current_clustering, embeddings, hp)
            elif hp['affinity']['type'] == 'twosample':
                affinities = twosample_affinity(current_clustering, dataset, hp)
            elif hp['affinity']['type'] == 'fast_twosample':
                affinities = fast_twosample_affinity(current_clustering, dataset, hp)
            current_clustering, cluster_a_index, cluster_b_index = merge(current_clustering, affinities)
            result['merges'].append({'current_clustering': current_clustering,
                                     'cluster_a_index': cluster_a_index,
                                     'cluster_b_index': cluster_b_index,
                                     'affinities': affinities.tolist()})

        print('Writing experiment results to', experiment_filename)
        experiment = {'hp': hp.copy(), 'result': result}
        del experiment['hp']['device'] # cannot be serialized
        with open(experiment_filename, 'w') as experiment_file:
            json.dump(experiment, experiment_file, indent=4)
        torch.cuda.empty_cache()

In [None]:
num_datasets = 10
for dataset_index in range(num_datasets):
    hp['dataset'] = 'imagenet-{}'.format(dataset_index)
    for affinity_type in ['averagelinkage', 'twosample', 'fast_twosample']:
        hp['affinity']['type'] = affinity_type
        run_experiment(hp)

Running experiment with hyperparameters:
{'dataset': 'imagenet-0', 'initial_clustering': {'num_initial_clusters': 20, 'initial_cluster_size': 100}, 'affinity': {'type': 'averagelinkage', 'epochs': 50, 'batch_size': 64, 'train_ratio': 0.5, 'log_interval': -1}, 'device': device(type='cuda'), 'desired_num_merges': 7}
Generate embeddings for imagenet-0
Architecture: vgg16
=> loading checkpoint 'deepcluster/pretrained/vgg16/checkpoint.pth.tar'
=> loaded checkpoint 'deepcluster/pretrained/vgg16/checkpoint.pth.tar' (epoch 425)
Load dataset: 0.10 s
Compute features
0 / 204	Time: 3.168 (3.168)
5 / 204	Time: 0.171 (0.675)
10 / 204	Time: 0.319 (0.461)
15 / 204	Time: 0.173 (0.384)
20 / 204	Time: 0.174 (0.359)
25 / 204	Time: 0.174 (0.325)
30 / 204	Time: 0.324 (0.308)
35 / 204	Time: 0.175 (0.292)
40 / 204	Time: 0.174 (0.278)
45 / 204	Time: 0.172 (0.270)
50 / 204	Time: 0.262 (0.273)
55 / 204	Time: 0.349 (0.268)
60 / 204	Time: 0.175 (0.262)
65 / 204	Time: 0.174 (0.260)
70 / 204	Time: 0.214 (0.260)
75 



Epoch: 1, Train loss: 7.46, mean arithmetic mean: 0.56
Epoch: 2, Train loss: 4.47, mean arithmetic mean: 0.70
Epoch: 3, Train loss: 3.74, mean arithmetic mean: 0.81
Epoch: 4, Train loss: 3.37, mean arithmetic mean: 0.82
Epoch: 5, Train loss: 3.24, mean arithmetic mean: 0.75
Epoch: 6, Train loss: 3.32, mean arithmetic mean: 0.83
Epoch: 7, Train loss: 2.79, mean arithmetic mean: 0.82
Epoch: 8, Train loss: 2.75, mean arithmetic mean: 0.79
Epoch: 9, Train loss: 2.43, mean arithmetic mean: 0.85
Epoch: 10, Train loss: 2.26, mean arithmetic mean: 0.82
Epoch: 11, Train loss: 2.24, mean arithmetic mean: 0.84
