## Gaussian Pyramid Levels

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import random

import cv2
import numpy as np
import matplotlib.pyplot as plt
from online_triplet_loss.losses import *
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import datasets, transforms
import torch.nn as nn
from torch.nn import functional as F
from torch import linalg
from scipy.spatial.distance import pdist
from sklearn.cluster import KMeans
from copy import deepcopy

from collections import defaultdict
from tqdm import tqdm


from torch.utils.data import DataLoader
from torchvision import models
import torch.optim as optim

## Learning

In [3]:
class CustomResNet(nn.Module):
    def __init__(self, num_classes):
        super(CustomResNet, self).__init__()
        
        modules = list(models.resnet18(weights=None).children())[:-1]
        self.model = nn.Sequential(*modules)
        self.linear = nn.Linear(512, num_classes)

    def forward(self, x):
        embed = self.model(x).squeeze()
        x = self.linear(embed)
        return x, embed 

In [4]:
class PetDataset(Dataset):
    def __init__(self, flist, transform, labels):
        
        self.flist = flist
        self.transform = transform
        self.labels = np.array(labels).astype("int64")
        assert len(flist) == len(labels)

    def __len__(self):
        return len(self.flist)

    def __getitem__(self, index):
        sample = self.flist[index]

        # read in the image, apply the standard transformation
        img = self.transform(Image.open(sample))

        return img, self.labels[index]

In [5]:
# cluster the features
def cluster_features(features, num_clusters):
    
    cobj = KMeans(n_clusters=num_clusters)
    cobj.fit(features)    
    assignments = cobj.labels_
    
    return assignments 


def get_cluster_info(labels):
    
    clusters = defaultdict(int)
    for l in labels:
        clusters[l] += 1
    
    empty = 0
    for c in clusters:
        if clusters[c] == 0:
            empty += 1
    
    print(f"Number of empty clusters: {empty}")
    plt.bar(sorted(clusters.keys()), [clusters[c] for c in sorted(clusters.keys())])
    plt.show()
    plt.xlabel("Cluster index")
    plt.ylabel("Cluster size")
    
    return 

In [6]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
# use the augmented images
src = "pet/train/output"
flist_full = [os.path.join(src, f) for f in sorted(os.listdir(src))]

**Steps in every epoch:**


1. Create random labels, create a dataset, dataloader with these labels
2. Do a forward & backward pass, using the feature vectors perform k-means clustering
3. Use cluster assignments as the labels and redefine the dataset and dataloader
4. Go to 2


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
NUM_CLASSES = 200
NUM_CLUSTERS = NUM_CLASSES
FEAT_DIM = 512

In [10]:
model = CustomResNet(num_classes=NUM_CLASSES)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

In [24]:
CLUSTER_EVERY = 250
BATCH_SIZE = 32

use_full_dataset = False

if use_full_dataset:
    CLUSTER_EVERY = len(flist_full)/BATCH_SIZE
    flist = flist_full
else:
    flist = random.sample(flist_full, CLUSTER_EVERY*BATCH_SIZE)

In [25]:
num_samples = len(flist)

# create random labels
labels = np.random.randint(low=0, high=NUM_CLASSES, size=num_samples)

# create the dataset and dataloader
NUM_WORKERS = 4
train_dataset = PetDataset(flist, transform, labels)
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

In [None]:
running_loss = 0.0
num_epochs = 5

model = model.to(device)
model.train()
for epoch in range(num_epochs):

    # accumulate embeddings to cluster them later
    embeds = None
    for idx, (x, y) in enumerate(tqdm(train_loader)):
        
        images_ = x.to(device)
        labels_ = y.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs, batch_embeds = model(images_)
        
        # Accumulate the embeddings
        batch_embeds = batch_embeds.clone().detach().cpu().numpy()
        if embeds is None:
            embeds = batch_embeds.copy()
        else:
            embeds = np.concatenate([embeds, batch_embeds], axis=0)
            
        loss = criterion(outputs, labels_)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        if (idx+1) % CLUSTER_EVERY == 0:
            break
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / CLUSTER_EVERY:.4f}')
    running_loss = 0.0
    
    # perform the clustering
    labels = cluster_features(embeds, num_clusters=NUM_CLUSTERS)  
    
    get_cluster_info(labels)

    # re-define the dataset
    train_dataset = PetDataset(flist, transform, labels)
    train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

100%|█████████▉| 249/250 [00:19<00:00, 12.45it/s]


Epoch [1/5], Loss: 5.3079


In [23]:
torch.save(model.state_dict(), "pet_resnet_dc_e5_c200_full.pth")