<a href="https://colab.research.google.com/github/dnzambuli/Deep-learning-4-CV/blob/master/Contrastive_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Objective
The goal of this lab is to understand how contrastive learning works by implementing SimCLR,
a self-supervised learning framework. You will:

1. Apply data augmentation techniques to create positive pairs.
2. Train a neural network using contrastive loss.
3. Fine-tune the pretrained model on a classification task.
4. Evaluate the effectiveness of contrastive learning.

## Libraries
🐍 python (>=3.7)

🔦 PyTorch

🔦 👁 torchvision

🤔 NumPy

〽 Matplotlib

🧑 scikit-learn

# Step 1: Load and Preprocess the Dataset

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
# Define data augmentations
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)],
                           p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             transform=train_transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True,
                          num_workers=4)


100%|██████████| 170M/170M [00:54<00:00, 3.11MB/s]


# Step 2: Define the SimCLR Model

In [2]:
import torch.nn as nn
import torchvision.models as models
class SimCLR(nn.Module):
  def __init__(self, base_model='resnet18', projection_dim=128):
    super(SimCLR, self).__init__()
    self.encoder = models.resnet18(pretrained=False)
    self.encoder.fc = nn.Identity() # Remove classification head
    # Projection head
    self.projection_head = nn.Sequential(
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, projection_dim)
        )
  def forward(self, x):
    x = self.encoder(x)
    x = self.projection_head(x)
    return x

# Step 3: Define the Contrastive Loss (NT-Xent Loss)

The **Normalized Temperature-scaled Cross-Entropy Loss (NT-Xent)** is used for contrastive
learning

In [3]:
import torch.nn.functional as F
def contrastive_loss(out_1, out_2, temperature=0.5):
  # Normalize embeddings
  out_1 = F.normalize(out_1, dim=1)
  out_2 = F.normalize(out_2, dim=1)
  # Compute similarity scores
  batch_size = out_1.shape[0]
  labels = torch.arange(batch_size).cuda()

  similarity_matrix = torch.mm(out_1, out_2.T) / temperature
  loss = F.cross_entropy(similarity_matrix, labels)
  return loss

# Step 4: Train the SimCLR Model
Train the model using contrastive loss.

In [None]:
import torch.optim as optim
# Initialize model and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "t4")
model = SimCLR().to(device)
optimizer = optim.Adam(model.parameters(), lr=3e-4)
# Training loop
num_epochs = 100
for epoch in range(num_epochs):
  for (images, _) in train_loader:
    images = images.to(device)

    # Generate two augmented views
    # The train_transform is called on the CPU
    # before moving to the device to avoid the error
    images_1 = torch.stack([train_transform(transforms.ToPILImage()(image.cpu())) for image in images]).to(device)
    images_2 = torch.stack([train_transform(transforms.ToPILImage()(image.cpu())) for image in images]).to(device)



    # Forward pass
    out_1 = model(images_1)
    out_2 = model(images_2)

    # Compute loss
    loss = contrastive_loss(out_1, out_2)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")



Epoch [1/100], Loss: 3.6999
Epoch [2/100], Loss: 3.6923


# Step 5: Transfer Learning and Evaluation
Fine-tune the pretrained model for classification.

In [None]:
# Load CIFAR-10 test dataset
test_transform = transforms.Compose([
    transforms.ToTensor(),
    ])
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            transform=test_transform, download=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False,
                         num_workers=4)
# Define classifier
class Classifier(nn.Module):
  def __init__(self, base_model):
    super(Classifier, self).__init__()
    self.encoder = base_model.encoder # Use pretrained encoder
    self.fc = nn.Linear(512, 10) # 10 classes in CIFAR-10
  def forward(self, x):
    x = self.encoder(x)
    x = self.fc(x)
    return x
# Initialize classifier
classifier = Classifier(model).to(device)
optimizer = optim.Adam(classifier.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()
# Training classifier
num_epochs = 10
for epoch in range(num_epochs):
  classifier.train()
  for (images, labels) in train_loader:
    images, labels = images.to(device), labels.to(device)

    outputs = classifier(images)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
# Evaluate classifier
classifier.eval()
correct, total = 0, 0
with torch.no_grad():
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = classifier(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
  accuracy = 100 * correct / total
  print(f"Test Accuracy: {accuracy:.2f}%")

# Step 6: Analysis and Interpretation
1. Compare the test accuracy of the classifier when trained:
o From scratch
o With contrastive learning pretraining
2. Visualize learned embeddings using t-SNE.

In [None]:
from sklearn.manifold import TSNE
import numpy as np
import matplotlib.pyplot as plt
# Extract embeddings
classifier.eval()
embeddings, labels_list = [], []
with torch.no_grad():
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    features = classifier.encoder(images)
    embeddings.append(features.cpu().numpy())
    labels_list.append(labels.cpu().numpy())
# Convert to numpy
embeddings = np.concatenate(embeddings, axis=0)
labels_list = np.concatenate(labels_list, axis=0)
# Reduce dimensions
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings)
# Plot embeddings
plt.figure(figsize=(10, 6))
scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
c=labels_list, cmap='tab10', alpha=0.7)
plt.legend(*scatter.legend_elements(), title="Classes")
plt.title("t-SNE Visualization of Embeddings")
plt.show()