<a href="https://colab.research.google.com/github/igor531205/applied_machine_learning_tasks/blob/main/hw7_torch_techniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing libraries

In [None]:
from google.colab import drive
from google.colab.patches import cv2_imshow

# Mount Google Drive.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Standard libraries

In [None]:
import numpy as np
import pandas as pd
import random
from sklearn.metrics import confusion_matrix, classification_report

PyTorch libraries

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import Compose, ToTensor, Resize
from torchvision.models import convnext_large, efficientnet_v2_m

Download dataset CIFAR10

In [None]:
# List of class labels for CIFAR10 dataset
classes = [
    "airplane",
    "automobile",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
]

# Download training data from open datasets.
training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

Files already downloaded and verified
Files already downloaded and verified


Create data loaders

In [None]:
# Number of samples per batch
batch_size = 512

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

# Iterate over test dataloader
for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([512, 3, 32, 32])
Shape of y: torch.Size([512]) torch.int64


Select device for training

In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


Creating Model ConvNeXt_Large

In [None]:
# ConvNeXt_Large model
model = convnext_large(weights='DEFAULT')
model.classifier[2] = nn.Linear(model.classifier[2].in_features, len(classes))
model = model.to(device)
print(model)

ConvNeXt(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 192, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
    )
    (1): Sequential(
      (0): CNBlock(
        (block): Sequential(
          (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
          (1): Permute()
          (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=192, out_features=768, bias=True)
          (4): GELU(approximate='none')
          (5): Linear(in_features=768, out_features=192, bias=True)
          (6): Permute()
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): CNBlock(
        (block): Sequential(
          (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
          (1): Permute()
          (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (3): Linear(

Loss function and optimizer for train model

In [None]:
# Loss function for classification
loss_fn = nn.CrossEntropyLoss()
# SGD optimizer with a learning rate = 1e-4
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

Iterative augmentation

In [None]:
# Augmentation pipeline
augmentation_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
])

# Apply augmentation
augmented_training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=False,
    transform=augmentation_transforms,
)
augmented_train_dataloader = DataLoader(augmented_training_data, batch_size=batch_size)

Iterative layer unfreezing

In [None]:
# Iterative unfreezing function
def iterative_unfreeze(model, optimizer, current_epoch, total_epochs):
    # Unfreeze one layer group at a time during training
    num_layers = len(list(model.parameters()))
    layers_to_unfreeze = int(num_layers * (current_epoch / total_epochs))
    for i, param in enumerate(model.parameters()):
        if i < layers_to_unfreeze:
            param.requires_grad = True
        else:
            param.requires_grad = False
    # Reinitialize optimizer with updated parameters
    optimizer = torch.optim.SGD(
        filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
    return optimizer

Pseudo-labeling

In [None]:
# Pseudo-labeling function
def pseudo_labeling(dataloader, model):
    pseudo_labels = []
    model.eval()
    with torch.no_grad():
        for X, _ in dataloader:
            X = X.to(device)
            preds = model(X)
            pseudo_labels.extend(preds.argmax(dim=1).cpu().numpy())
    return pseudo_labels

Loading teacher model from previous HW

In [None]:
teacher_model = convnext_large(weights=None).to(device)
teacher_model.classifier[2] = nn.Linear(teacher_model.classifier[2].in_features, 10).to(device)
state_dict = torch.load("/content/drive/MyDrive/model_c.pth", weights_only=True)
teacher_model.load_state_dict(state_dict)
teacher_model.to(device)

Distillation

In [None]:
# Distillation
teacher_model.eval()
def distillation_loss(student_preds, teacher_preds, labels, temperature=2.0):
    kd_loss = nn.KLDivLoss(reduction='batchmean')(
        nn.functional.log_softmax(student_preds / temperature, dim=1),
        nn.functional.softmax(teacher_preds / temperature, dim=1)
        )
    ce_loss = nn.CrossEntropyLoss()(student_preds, labels)
    return kd_loss * temperature ** 2 + ce_loss

Function for Train model

In [None]:
# Training function
def train(dataloader, model, teacher_model, optimizer, current_epoch, total_epochs, pseudo_labels):
    # Total number of samples in train dataset
    size = len(dataloader.dataset)
    # Set model to training mode
    model.train()

    # Freeze layers
    optimizer = iterative_unfreeze(model, optimizer, current_epoch, total_epochs)
    # Train loop
    for batch, (X, y) in enumerate(dataloader):
        # Replace labels with pseudo-labels if provided
        if pseudo_labels is not None:
            y = torch.tensor(pseudo_labels[batch * batch_size: (batch + 1) * batch_size])

        # Move data and labels to selected device
        X, y = X.to(device), y.to(device)

        # Prediction
        teacher_preds = teacher_model(X)
        student_preds = model(X)

        # Calculate loss
        loss = distillation_loss(student_preds, teacher_preds, y)

        # Backpropagation
        # Calculate gradients
        loss.backward()
        # Update weights
        optimizer.step()
        # Reset gradients
        optimizer.zero_grad()

        # Print loss every 10 batches
        if batch % 10 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

Function for Test model

In [None]:
# Testing function
def test(dataloader, model, loss_fn):
    # Total number of samples in test dataset
    size = len(dataloader.dataset)
    # Number of batches
    num_batches = len(dataloader)
    # Set model to evaluation mode
    model.eval()
    test_loss, correct = 0, 0
    # Disable gradient calculate
    with torch.no_grad():
        # Test loop
        for X, y in dataloader:
            # Move data and labels to selected device
            X, y = X.to(device), y.to(device)
            # Prediction
            pred = model(X)
            # Calculate test loss
            test_loss += loss_fn(pred, y).item()
            # Count predictions
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    # Average test loss
    test_loss /= num_batches
    # Accuracy
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Train model

In [None]:
# Number of epochs
epochs = 5
# Generate pseudo-labeling
pseudo_labels = pseudo_labeling(train_dataloader, teacher_model)
# Training loop
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(augmented_train_dataloader, model, teacher_model, optimizer, t + 1, epochs, pseudo_labels)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.881706  [  512/50000]
loss: 1.839807  [ 5632/50000]
loss: 1.618406  [10752/50000]
loss: 1.489320  [15872/50000]
loss: 1.469306  [20992/50000]
loss: 1.303429  [26112/50000]
loss: 1.357556  [31232/50000]
loss: 1.227699  [36352/50000]
loss: 1.329663  [41472/50000]
loss: 1.211839  [46592/50000]
Test Error: 
 Accuracy: 76.2%, Avg loss: 0.688784 

Epoch 2
-------------------------------
loss: 1.166590  [  512/50000]
loss: 1.272633  [ 5632/50000]
loss: 1.253030  [10752/50000]
loss: 1.148722  [15872/50000]
loss: 1.133692  [20992/50000]
loss: 1.150875  [26112/50000]
loss: 1.168610  [31232/50000]
loss: 1.088743  [36352/50000]
loss: 1.209793  [41472/50000]
loss: 1.084303  [46592/50000]
Test Error: 
 Accuracy: 77.3%, Avg loss: 0.655015 

Epoch 3
-------------------------------
loss: 1.130071  [  512/50000]
loss: 1.195657  [ 5632/50000]
loss: 1.132865  [10752/50000]
loss: 1.063567  [15872/50000]
loss: 1.097794  [20992/50000]
loss: 1.084312  [26112/500

In [None]:
torch.save(model.state_dict(), "/content/drive/MyDrive/model_student.pth")
print("Saved PyTorch Model State to model_student.pth")

Saved PyTorch Model State to model_student.pth


Eval model

In [None]:
# Random sample
random_index = random.randint(0, len(test_data) - 1)
x, y = test_data[random_index][0], test_data[random_index][1]

# Set model to evaluation mode
model.eval()
# Disable gradient calculate
with torch.no_grad():
    # Move data to selected device
    x = x.unsqueeze(0).to(device)
    # Prediction
    pred = model(x)
    # Get predicted and actual class labels
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "bird", Actual: "bird"


In [None]:
all_preds = []
all_labels = []

# Disable gradient calculate
with torch.no_grad():
    # Loop
    for X, y in test_dataloader:
        # Move data to selected device
        X, y = X.to(device), y.to(device)
        # Prediction
        pred = model(X)
        # Append predicted labels to list
        all_preds.extend(pred.argmax(1).cpu().numpy())
        # Append real labels to list
        all_labels.extend(y.cpu().numpy())

# Calculate and display the confusion matrix
cm = confusion_matrix(all_labels, all_preds)
cm_df = pd.DataFrame(cm, index=classes, columns=classes)
cm_df

Unnamed: 0,airplane,automobile,bird,cat,deer,dog,frog,horse,ship,truck
airplane,782,27,28,9,8,2,3,11,88,42
automobile,7,925,0,0,0,0,4,1,4,59
bird,42,2,733,17,68,38,68,24,5,3
cat,7,14,44,575,47,167,65,57,13,11
deer,13,1,43,32,628,9,39,225,7,3
dog,7,3,24,99,33,734,9,83,4,4
frog,10,3,28,34,31,9,882,1,1,1
horse,11,1,25,13,40,66,1,839,0,4
ship,49,25,4,4,1,2,1,2,899,13
truck,15,102,0,2,0,0,1,4,24,852


In [None]:
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=classes))


Classification Report:
              precision    recall  f1-score   support

    airplane       0.83      0.78      0.80      1000
  automobile       0.84      0.93      0.88      1000
        bird       0.79      0.73      0.76      1000
         cat       0.73      0.57      0.64      1000
        deer       0.73      0.63      0.68      1000
         dog       0.71      0.73      0.72      1000
        frog       0.82      0.88      0.85      1000
       horse       0.67      0.84      0.75      1000
        ship       0.86      0.90      0.88      1000
       truck       0.86      0.85      0.86      1000

    accuracy                           0.78     10000
   macro avg       0.79      0.78      0.78     10000
weighted avg       0.79      0.78      0.78     10000



In [None]:
# Unmount Google Drive
drive.flush_and_unmount()