### Set GPU

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2"

## Set Dataset Name

In [2]:
# dataset_name = 'CIFAR10'
# dataset_name = 'TINYIMAGENET'
# dataset_name = 'MNIST'
# dataset_name = 'TINYIMAGENET'
dataset_name = 'IMBALANCED_CIFAR10'

### Run All Now

In [3]:
# from models.resnet_stl import resnet18
import torch
import numpy as np
from tqdm import tqdm


from collections import OrderedDict

from models.resnet_cifar import resnet18
from utils.memory import MemoryBank
from utils.train_utils import simclr_train
from utils.utils import fill_memory_bank
from utils.config import create_config
from utils.common_config import get_model

In [4]:
output_folder = '../results/'
if dataset_name == "CIFAR10":
    output_folder += 'cifar-10/'
    config_exp_path = './configs/pretext/simclr_cifar10.yml'
    cfg_path = 'configs/CIFAR10_RESNET18.yaml'
elif dataset_name == "CIFAR100":
    output_folder += 'cifar-20/'
    config_exp_path = './configs/pretext/simclr_cifar20.yml'
    cfg_path = 'configs/CIFAR100_RESNET18.yaml'
elif dataset_name == "MNIST":
    output_folder += 'mnist/'
    config_exp_path = './configs/pretext/simclr_mnist.yml'
    cfg_path = 'configs/MNIST_RESNET18.yaml'
elif dataset_name == "TINYIMAGENET":
    output_folder += 'tinyimagenet/'
    config_exp_path = './configs/pretext/simclr_tinyimagenet.yml'
    cfg_path = 'configs/TINYIMAGENET_RESNET18.yaml'
elif dataset_name == 'IMBALANCED_CIFAR10':
    output_folder += 'imbalanced-cifar-10/'
    config_exp_path = './configs/pretext/simclr_cifar10_im.yml'
    cfg_path = 'configs/CIFAR10_RESNET18.yaml'
    
path_to_model = output_folder + 'pretext/model.pth.tar'

temp = torch.load(path_to_model)

In [5]:
import argparse

config_env_path = './configs/env.yml'
p = create_config(config_env_path, config_exp_path)

In [6]:
model = get_model(p)
model.load_state_dict(temp)
model.eval();

In [7]:
from pycls.datasets.data import Data
from pycls.config import cfg
cfg.merge_from_file(cfg_path)
cfg.DATASET.NAME = dataset_name
data_obj = Data(cfg)

In [8]:
train_data, train_size = data_obj.getDataset(save_dir=f'../{cfg.DATASET.ROOT_DIR}', isTrain=True, isDownload=True)

Preprocess Operations Selected ==>  [RandomCrop(size=(32, 32), padding=4), ToTensor(), Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.2435, 0.2616])]
Files already downloaded and verified
Train Mode: Contain 13996 images


In [9]:
# trainSet_path, valSet_path = data_obj.makeTVSets(val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data,\
#                                  seed_id=cfg.RNG_SEED, save_dir='exp')
# trainSet, valSet = data_obj.loadTVPartitions(trainSetPath=trainSet_path, valSetPath=valSet_path)
trainSet = [i for i in range(train_size)]
trainSet = np.array(trainSet, dtype=np.ndarray)

In [10]:
@torch.no_grad()
def get_representation(clf_model, idx_set, dataset):
    clf_model.cuda()
    tempIdxSetLoader = data_obj.getSequentialDataLoader(indexes=idx_set, batch_size=int(cfg.TRAIN.BATCH_SIZE/cfg.NUM_GPUS), data=dataset)
    features = []

    print(f"len(dataLoader): {len(tempIdxSetLoader)}")

    for i, (x, _) in enumerate(tqdm(tempIdxSetLoader, desc="Extracting Representations")):
        with torch.no_grad():
            x = x.cuda()
            x = x.type(torch.cuda.FloatTensor)
            temp_z = clf_model(x)
            features.append(temp_z.cpu().numpy())

    features = np.concatenate(features, axis=0)
    return features

In [11]:
features = get_representation(model, trainSet, train_data)

Extracting Representations:   1%|          | 1/110 [00:00<00:21,  5.14it/s]

len(dataLoader): 110


Extracting Representations: 100%|██████████| 110/110 [00:05<00:00, 20.28it/s]


In [12]:
features.shape

(13996, 128)

In [13]:
np.save(f'{output_folder}/{dataset_name}_features{features.shape[1]}.npy', features)