# The classic SimCLR with ResNet50 on CIFAR-10

In [1]:
!pip install pynvml

Collecting pynvml
  Downloading pynvml-11.5.3-py3-none-any.whl.metadata (8.8 kB)
Downloading pynvml-11.5.3-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynvml
Successfully installed pynvml-11.5.3


## Imports

In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms, datasets, models

import time
import json
import os
import psutil
from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetUtilizationRates

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, normalized_mutual_info_score
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize

import numpy as np
from tqdm.notebook import tqdm

## Constants

In [45]:
BATCH_SIZE = 256
EPOCHS = 100
LEARNING_RATE = 1e-3
TEMPERATURE = 0.5
NUM_WORKERS = 4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Metrics

In [32]:
# GPU monitoring
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(0) # GPU with index 0

def gpu_utilization():
  return nvmlDeviceGetUtilizationRates(handle).gpu


# memory usage monitoring
def memory_usage():
  process = psutil.Process(os.getpid())
  return process.memory_info().rss / (1024 ** 2) # return in MB


# model size monitoring
def model_size(model):
  torch.save(model.state_dict(), "temp.p")
  size = os.path.getsize("temp.p") / (1024 ** 2) # return in MB
  os.remove("temp.p")
  return size


# contrastive loss
def nt_xent_loss(z_i, z_j, temperature=TEMPERATURE):
  N = 2 * z_i.size(0)
  z = torch.cat((z_i, z_j), dim=0)
  similarity_matrix = torch.matmul(z, z.T) / temperature
  mask = (~torch.eye(N, N, dtype=bool)).to(DEVICE)

  # exponent of similarity
  exp_sim = torch.exp(similarity_matrix) * mask
  sum_exp_sim = exp_sim.sum(dim=1, keepdim=True)

  # loss for positive pairs
  positive_sim = torch.exp(torch.sum(z_i * z_j, dim=1) / temperature)
  loss = -torch.log(positive_sim / sum_exp_sim[:z_i.size(0)])
  loss = loss.mean()
  return loss


# model metrics
metrics = {
    "linear_evaluation_accuracy": [],
    "contrastive_loss": [],
    "nmi": [],
    "memory_usage_MB": [],
    "model_size_MB": [],
    "inference_time_sec": [],
    "training_time_per_epoch_sec": [],
    "gpu_utilization_percent": []
}

## CIFAR-10 load and transfromation

In [33]:
class SimCLRTransform:
  def __init__(self, size=32):
    self.transform = transforms.Compose([
        transforms.RandomResizedCrop(size=size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

  def __call__(self, x):
        return self.transform(x), self.transform(x)

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])

In [34]:
train = datasets.CIFAR10(root='./data', train=True, transform=SimCLRTransform(), download=True)
train_loader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

test = torchvision.datasets.CIFAR10(root='./data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

Files already downloaded and verified
Files already downloaded and verified


## SimCLR

In [35]:
class SimCLR(nn.Module):
  def __init__(self, base_model, output_dim):
    super(SimCLR, self).__init__()
    self.encoder = base_model(pretrained=False, num_classes=output_dim)
    dim_mlp = self.encoder.fc.in_features

    # delete last layer FC
    self.encoder.fc = nn.Identity()

    # projection head layer
    self.projector = nn.Sequential(
        nn.Linear(dim_mlp, dim_mlp),
        nn.ReLU(),
        nn.Linear(dim_mlp, output_dim)
    )

  def forward(self, x):
    h = self.encoder(x)
    z = self.projector(h)
    z = nn.functional.normalize(z, dim=1)
    return h, z

In [36]:
model = SimCLR(resnet50, output_dim=128).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)



## Training

In [46]:
epoch_bar = tqdm(range(EPOCHS), desc="Epochs", position=0)

for epoch in epoch_bar:
  model.train()
  epoch_loss = 0
  start_time = time.time()

  batch_bar = tqdm(train_loader, desc=f"Training Epoch {epoch+1}", leave=False, position=1)

  for (x_i, x_j), _ in batch_bar:
    x_i = x_i.to(DEVICE)
    x_j = x_j.to(DEVICE)

    optimizer.zero_grad()
    _, z_i = model(x_i)
    _, z_j = model(x_j)
    loss = nt_xent_loss(z_i, z_j, TEMPERATURE)
    loss.backward()
    optimizer.step()
    epoch_loss += loss.item()

  # save metrics
  training_time = time.time() - start_time
  metrics["contrastive_loss"].append(epoch_loss / len(train_loader))
  metrics["training_time_per_epoch_sec"].append(training_time)
  metrics["gpu_utilization_percent"].append(gpu_utilization())
  metrics["memory_usage_MB"].append(memory_usage())
  metrics["model_size_MB"].append(model_size(model))

  epoch_bar.set_postfix({
        "Loss": f"{epoch_loss / len(train_loader):.4f}",
        "Time": f"{training_time:.2f}s",
        "GPU Util": f"{metrics['gpu_utilization_percent'][-1]}%"
    })

Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training Epoch 1:   0%|          | 0/195 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x78eaa028fd90>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x78eaa028fd90>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Training Epoch 2:   0%|          | 0/195 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x78eaa028fd90>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x78eaa028fd90>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

KeyboardInterrupt: 

## Evaluation

In [None]:
def get_features(loader, model):
  model.eval()
  features = []
  labels = []
  inference_start = time.time()
  with torch.no_grad():
    for x, y in loader:
      x = x.to(DEVICE)
      h, _ = model(x)
      features.append(h.cpu().numpy())
      labels.append(y.numpy())
  inference_time = time.time() - inference_start
  return np.concatenate(features), np.concatenate(labels), inference_time

In [None]:
# feature extraction for linear classification
_, _, train_inference_time = get_features(train_loader, model)
_, _, test_inference_time = get_features(test_loader, model)
metrics["inference_time_sec"].append(train_inference_time + test_inference_time)

# linear evaluation
for param in model.encoder.parameters():
  param.requires_grad = False

train_loader_linear = DataLoader(train, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader_linear = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

train_features_linear, train_labels_linear, _ = get_features(train_loader_linear, model)
test_features_linear, test_labels_linear, _ = get_features(test_loader_linear, model)

clf = LogisticRegression(max_iter=1000)
clf_start = time.time()
clf.fit(train_features_linear, train_labels_linear)
clf_time = time.time() - clf_start

pred = clf.predict(test_features_linear)
accuracy = accuracy_score(test_labels_linear, pred)
metrics["linear_eval_accuracy"].append(accuracy)

In [None]:
nmi = normalized_mutual_info_score(test_labels_linear, pred)
metrics["nmi"].append(nmi)

In [None]:
print(f"Linear Evaluation Accuracy: {accuracy:.4f}, NMI: {nmi:.4f}")

In [None]:
with open("metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)

print("Saved in metrics.json")