In [2]:
import cv2
import numpy as np
from scipy.spatial.distance import pdist

import torch
import torchvision.models as models
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

%load_ext autoreload
%autoreload 2
from utils.data import get_data_loaders
from utils.train_eval import train, train_curriculum
from utils.misc import get_features, cluster_features

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FEAT_DIM=512

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Either cifar10 or flowers102
dataset = "cifar10"
data_config = {
    "batch_size": 64,
    "num_workers": 2,
}
loaders, num_classes, datasets = get_data_loaders(dataset=dataset,
                                                  batch_size=data_config["batch_size"],
                                                  num_workers=data_config["num_workers"],
                                                  return_dataset=True)

train_loader, test_loader = loaders
train_dataset, test_dataset = datasets

Files already downloaded and verified
Files already downloaded and verified


In [4]:
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of testing batches: {len(test_loader)}")

Number of training batches: 782
Number of testing batches: 157


In [None]:
# which model to use for feature extraction?
vgg16 = models.vgg16(pretrained=True)
extractor = vgg16.features

# features for all the samples in the train dataloader
feats = get_features(extractor, train_loader, device)



In [None]:
"""
NOTE: If there are too many samples, training KMeans can take a 
lot of time. To prevent that from happening, you can limit the number
of samples being used to train the KMeans using this cell. 
E.g., uncomment the last line to only use the first 1000 features.
"""
features = feats
# features = feats[:1000]

In [None]:
# dists = get_pairwise_distance(feats)
# print(dists.shape)

In [8]:
# Number of clusters to split the input samples into using KMeans 
num_clusters = 5
c_labels = cluster_features(features, num_clusters=num_clusters)

NameError: name 'KMeans' is not defined

In [None]:
# separate out the data into clusters
from collections import defaultdict

clustered_data = defaultdict(list)

for idx, l in enumerate(c_labels):
    clustered_data[l].append(idx)

In [12]:
# How big are the different clusters?
c_size = []

for l in clustered_data.keys():
    c_size.append((l, len(clustered_data[l])))

# sort by the number of samples in the cluster
c_size = sorted(c_size, key=lambda x: x[1], reverse=True)
print(c_size)

[(1, 17686), (0, 11627), (3, 11611), (4, 4729), (2, 4347)]


In [25]:
# the network to be trained
model = models.resnet18(pretrained=False)

# Change the output of the last FC layer as per the number of classes
fc_input = model.fc.in_features
model.fc = nn.Linear(fc_input, num_classes)

learning_rate = 1e-3

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
model = model.to(device)

# How many epochs to train for (per cluster)?
num_epochs = 10

""" We experimented with two different ways to present data to the model:
1. Present clusters in increasing order of cluster size (mode: S2L)
2. Present clusters in decreasing order of cluster size (mode: L2S)
"""
mode = "L2S"

train_curriculum(model, dataset, train_dataset, clustered_data, c_idx, mode, num_epochs, data_config)

Cluster 1 (35.37% data) done. Test Acc: 56.960
Cluster 0 (23.25% data) done. Test Acc: 53.510
Cluster 3 (23.22% data) done. Test Acc: 54.800
Cluster 4 (9.46% data) done. Test Acc: 49.780
Cluster 2 (8.69% data) done. Test Acc: 38.860


In [22]:
# Residual training: Fine-tune the model on the entire dataset for a few epochs

num_epochs_res = 2

# TensorBoard log directory
log_dir = f"./logs/{dataset}_vgg16_{mode}_{num_epochs}_c{num_clusters}_residual{num_epochs_res}"
writer = SummaryWriter(log_dir)

config = {
    "opt": optimizer,
    "crit": criterion,
    "log_freq_test": 250,
    "log_freq_tr": 150, 
}

train(model, train_loader, test_loader, num_epochs_res, config, device, writer)