### Set GPU

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "3"

## Set Dataset Name

In [2]:
dataset_name = 'CIFAR10'
# dataset_name = 'CIFAR100'
# dataset_name = 'MNIST'
# dataset_name = 'TINYIMAGENET'

### Run All Now

In [3]:
# from models.resnet_stl import resnet18
import torch
import numpy as np
from tqdm import tqdm

from models.resnet_cifar import resnet18
from utils.memory import MemoryBank
from utils.train_utils import simclr_train
from utils.utils import fill_memory_bank
from utils.config import create_config
from utils.common_config import get_model, get_train_dataset, get_val_transformations, get_train_dataloader
from utils.evaluate_utils import hungarian_evaluate2

In [4]:
output_folder = '../results/'
if dataset_name == "CIFAR10":
    output_folder += 'cifar-10/'
    config_exp_path = './configs/selflabel/selflabel_cifar10.yml'
    cfg_path = 'configs/CIFAR10_RESNET18.yaml'
elif dataset_name == "CIFAR100":
    output_folder += 'cifar-100/'
    config_exp_path = './configs/selflabel/selflabel_cifar20.yml'
    cfg_path = 'configs/CIFAR100_RESNET18.yaml'
elif dataset_name == "MNIST":
    output_folder += 'mnist/'
    config_exp_path = './configs/selflabel/selflabel_mnist.yml'
    cfg_path = 'configs/MNIST_RESNET18.yaml'
elif dataset_name == "TINYIMAGENET":
    output_folder += 'tinyimagenet/'
    config_exp_path = './configs/selflabel/selflabel_tinyimagenet.yml'
    cfg_path = 'configs/TINYIMAGENET_RESNET18.yaml'
    
path_to_model = output_folder + 'selflabel/model.pth.tar'

temp = torch.load(path_to_model)

In [5]:
import argparse

config_env_path = './configs/env.yml'
p = create_config(config_env_path, config_exp_path)

In [6]:
model = get_model(p)
model.load_state_dict(temp)
model.eval()
model.cuda();

train_dataset = get_train_dataset(p, get_val_transformations(p),
                                        split='train', to_augmented_dataset=False) 
train_dataloader = get_train_dataloader(p, train_dataset)

### Change batch size if you run into out of memory error 

In [7]:
from pycls.datasets.data import Data
from pycls.config import cfg
cfg.merge_from_file(cfg_path)
data_obj = Data(cfg)

train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True)
trainSet = [i for i in range(train_size)]
trainSet = np.array(trainSet, dtype=np.ndarray)
train_dataloader = data_obj.getSequentialDataLoader(indexes=trainSet, batch_size=256, data=train_data)

test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=False, isDownload=True)
test_dataloader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED)

Preprocess Operations Selected ==>  [RandomResizedCrop(size=(32, 32), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR), ToTensor(), Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.2435, 0.2616])]
Files already downloaded and verified
Files already downloaded and verified


In [8]:
import torch.nn.functional as F

@torch.no_grad()
def get_predictions(p, dataloader, model, return_features=False):
    # Make predictions on a dataset with neighbors
    model.eval()
    predictions = [[] for _ in range(p['num_heads'])]
    probs = [[] for _ in range(p['num_heads'])]
    targets = []
    if return_features:
        ft_dim = get_feature_dimensions_backbone(p)
        features = torch.zeros((len(dataloader.sampler), ft_dim)).cuda()
    
    key_ = 'image'

    ptr = 0
    for images, lbl in tqdm(dataloader, desc="Extracting Self Label Predictions"):
        images = images.cuda()
        output = model(images, forward_pass='default')
        for i, output_i in enumerate(output):
            predictions[i].append(torch.argmax(output_i, dim=1))
        targets.append(lbl)

    predictions = [torch.cat(pred_, dim = 0).cpu() for pred_ in predictions]
    targets = torch.cat(targets, dim=0)

    out = [{'predictions': pred_, 'targets': targets} for pred_, prob_ in zip(predictions, probs)]

    if return_features:
        return out, features.cpu()
    else:
        return out

In [13]:
predictions = get_predictions(p, train_dataloader, model)

Extracting Self Label Predictions: 100%|██████████| 196/196 [00:17<00:00, 11.00it/s]


In [14]:
clustering_stats = hungarian_evaluate2(0, predictions, 
                                class_names=train_data.classes,
                                compute_confusion_matrix=False,
                                confusion_matrix_file=os.path.join('confusion_matrix.png'))

In [15]:
clustering_stats

{'ACC': 0.64748,
 'ARI': 0.3833661016526684,
 'NMI': 0.4597771161905145,
 'hungarian_match': [(0, 5),
  (1, 7),
  (2, 6),
  (3, 8),
  (4, 2),
  (5, 9),
  (6, 3),
  (7, 4),
  (8, 0),
  (9, 1)]}

In [20]:
np.save(f'{output_folder}/{dataset_name}_SCAN_cluster_ids.npy', predictions[0]['predictions'])