In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
import wandb
wandb.login(key="21cc29b0dd7a42c84bee84eb732730a65e762855")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvanshitamahajan1401[0m ([33msolo_yolo[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [24]:
def finish_run():
    if wandb.run is not None:
        wandb.finish()

In [4]:
import warnings
warnings.filterwarnings("ignore")

base_path = "/kaggle/input/cv-a2-dataset/datasets/CV Assignment 2/Q1"
train_data = torch.load(base_path +"/train_data.pt")
train_labels = torch.load(base_path  + "/train_labels.pt")
test_data = torch.load(base_path +  "/test_data.pt")
test_labels = torch.load(base_path +  "/test_labels.pt")

print(f"Train Data Shape: {train_data.shape}") 
print(f"Train Labels Shape: {train_labels.shape}") 
print(f"Test Data Shape: {test_data.shape}")
print(f"Test Labels Shape: {test_labels.shape}")

Train Data Shape: torch.Size([50000, 3, 36, 36])
Train Labels Shape: torch.Size([50000])
Test Data Shape: torch.Size([10000, 3, 36, 36])
Test Labels Shape: torch.Size([10000])


In [2]:
def get_resnet18(num_classes, pretrained=False):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)
    model.fc = nn.Linear(512, num_classes)  # Modify FC layer
    return model

In [None]:
model_details = get_resnet18(num_classes=10, pretrained=False)
x = torch. rand (1, 3, 36, 36)
for name, layer in model_details. named_children():
    x = layer(x) if 'fc' not in name else x
    print(f"{name}: {x.shape}")

conv1: torch.Size([1, 64, 18, 18])
bn1: torch.Size([1, 64, 18, 18])
relu: torch.Size([1, 64, 18, 18])
maxpool: torch.Size([1, 64, 9, 9])
layer1: torch.Size([1, 64, 9, 9])
layer2: torch.Size([1, 128, 5, 5])
layer3: torch.Size([1, 256, 3, 3])
layer4: torch.Size([1, 512, 2, 2])
avgpool: torch.Size([1, 512, 1, 1])
fc: torch.Size([1, 512, 1, 1])


In [6]:
class CustomDataset(Dataset):
    def __init__(self, data, labels, image_size, transform=None):
        self.data = data
        self.labels = labels
        self.image_size = image_size

        # Default transform if none is provided
        self.transform = transform if transform else transforms.Compose([
            transforms.Resize((self.image_size, self.image_size)),  # Resize dynamically
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, label = self.data[idx], self.labels[idx]
        img = img.float() / 255.0  # Normalize to [0,1] range

        # Apply transformation
        img = self.transform(img)

        return img, label

# Function to get data loaders for train, validation, and test
def get_dataloaders(batch_size=64, val_split=0.2, image_size = 36):
    # Split the dataset into training and validation sets (80% train, 20% validation)
    train, val, train_labels_1, val_labels_1 = train_test_split(train_data, train_labels, test_size=val_split, random_state=42)

    # Create dataset instances for train, validation, and test
    train_dataset = CustomDataset(train, train_labels_1, image_size)
    val_dataset = CustomDataset(val, val_labels_1, image_size)
    test_dataset = CustomDataset(test_data, test_labels, image_size)
    
    # Create data loaders for train, validation, and test
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader

# Training Function with Early Stopping
def train_model(model, train_loader, val_loader, test_loader, epochs=10, lr=0.001, patience=3, model_name = "Scratch Resnet18", image_size = 36):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_losses, val_losses = [], []
    
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    best_model_wts = None
    
    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        
        # Training loop
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)
        
        train_acc = correct / total
        
        # Validation loop
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_correct += (outputs.argmax(1) == labels).sum().item()
                val_total += labels.size(0)
        
        val_acc = val_correct / val_total
        avg_val_loss = val_loss / len(val_loader)
        avg_train_loss = total_loss / len(train_loader)

        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        
        print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Train Accuracy = {train_acc:.4f}, Validation Loss = {avg_val_loss:.4f}, Validation Accuracy = {val_acc:.4f}")
        
        # Early stopping logic
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_wts = model.state_dict()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping at epoch {epoch+1} due to no improvement in validation loss.")
            break
    
    # Load the best model weights after early stopping
    if best_model_wts is not None:
        model.load_state_dict(best_model_wts)
     
    run_name = f"Model: {model_name}, Image size: {image_size}"
    wandb.init(project="CV A2 Resnet", name=run_name, reinit=True)

    for epoch, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses), start=1):
        wandb.log({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss})

    return train_losses, val_losses

In [68]:
from sklearn.metrics import f1_score, confusion_matrix

def evaluate_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode

    correct, total = 0, 0
    all_preds, all_labels = [], []  # Store predictions and labels for F1-score & confusion matrix

    with torch.no_grad():  # Disable gradient computation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predictions = outputs.argmax(dim=1)  # Get class with highest probability
            
            correct += (predictions == labels).sum().item()
            total += labels.size(0)
            
            all_preds.extend(predictions.cpu().numpy())  # Move to CPU for sklearn
            all_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    f1 = f1_score(all_labels, all_preds, average="weighted")  # Weighted F1-score
    conf_matrix = confusion_matrix(all_labels, all_preds)  # Renamed variable

    return accuracy, f1, conf_matrix

In [7]:
num_classes = len(torch.unique(train_labels)) 
num_classes
train_loader_1, val_loader_1, test_loader_1 = get_dataloaders(image_size = 36)
train_loader_2, val_loader_2, test_loader_2 = get_dataloaders(batch_size = 32, image_size = 224)

## 1.1 Baseline

### ResNet from scratch

In [8]:
model_scratch_1 =  get_resnet18(num_classes, pretrained=False)
train_model(model_scratch_1, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Scratch Resnet18", image_size = 36)

Epoch 1: Train Loss = 1.3604, Train Accuracy = 0.5110, Validation Loss = 1.0987, Validation Accuracy = 0.6081
Epoch 2: Train Loss = 0.9648, Train Accuracy = 0.6597, Validation Loss = 0.9106, Validation Accuracy = 0.6756
Epoch 3: Train Loss = 0.7869, Train Accuracy = 0.7251, Validation Loss = 0.8799, Validation Accuracy = 0.6948
Epoch 4: Train Loss = 0.6511, Train Accuracy = 0.7738, Validation Loss = 0.7773, Validation Accuracy = 0.7357
Epoch 5: Train Loss = 0.5349, Train Accuracy = 0.8128, Validation Loss = 0.7616, Validation Accuracy = 0.7467
Epoch 6: Train Loss = 0.4276, Train Accuracy = 0.8501, Validation Loss = 0.7363, Validation Accuracy = 0.7662
Epoch 7: Train Loss = 0.3225, Train Accuracy = 0.8877, Validation Loss = 0.8419, Validation Accuracy = 0.7486
Epoch 8: Train Loss = 0.2435, Train Accuracy = 0.9144, Validation Loss = 0.9004, Validation Accuracy = 0.7526
Epoch 9: Train Loss = 0.1879, Train Accuracy = 0.9334, Validation Loss = 0.9725, Validation Accuracy = 0.7449
Early stop

([1.3604263772964478,
  0.9648204377174378,
  0.7869316023826599,
  0.6510714854717254,
  0.5349326236486435,
  0.42756436805725095,
  0.3225167368412018,
  0.24351552265286447,
  0.18787686287164687],
 [1.0987286692971636,
  0.9105733631143145,
  0.8799411515901043,
  0.7772657554240743,
  0.7615754985885256,
  0.7362740119551398,
  0.8418579595104144,
  0.9003843824574902,
  0.9725105084811047])

In [11]:
accuracy, f1, confusion_matrix = evaluate_model(model_scratch_1, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.74, f1: 0.7369257932876512


In [14]:
confusion_matrix

array([[793,  32,  28,   8,  18,   4,   8,  16,  31,  62],
       [ 14, 876,   5,   4,   8,   3,   3,   3,   6,  78],
       [ 55,  11, 668,  50,  75,  45,  45,  24,  12,  15],
       [ 30,  22,  62, 490,  82, 159,  67,  52,  12,  24],
       [ 15,   7,  57,  38, 724,  35,  36,  73,   7,   8],
       [ 18,  10,  49, 143,  57, 608,  17,  83,   5,  10],
       [  4,  19,  40,  41,  47,  27, 794,  15,   4,   9],
       [ 15,   6,  22,  19,  53,  19,   5, 845,   2,  14],
       [ 92,  60,   8,   8,   4,   4,   3,   7, 747,  67],
       [ 27,  74,   5,   4,   8,   3,   2,  13,   9, 855]])

### Pre trained Resnet on ImageNet

In [17]:
model_pretrained_1 = get_resnet18(num_classes, pretrained=True)
train_model(model_pretrained_1, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Pretrained Resnet18", image_size = 36)

Epoch 1: Train Loss = 0.8930, Train Accuracy = 0.6997, Validation Loss = 0.7280, Validation Accuracy = 0.7565
Epoch 2: Train Loss = 0.6007, Train Accuracy = 0.7976, Validation Loss = 0.9266, Validation Accuracy = 0.6956
Epoch 3: Train Loss = 0.4961, Train Accuracy = 0.8354, Validation Loss = 0.6822, Validation Accuracy = 0.7761
Epoch 4: Train Loss = 0.3823, Train Accuracy = 0.8701, Validation Loss = 0.6365, Validation Accuracy = 0.7993
Epoch 5: Train Loss = 0.2761, Train Accuracy = 0.9058, Validation Loss = 0.6115, Validation Accuracy = 0.8104
Epoch 6: Train Loss = 0.2252, Train Accuracy = 0.9239, Validation Loss = 0.5833, Validation Accuracy = 0.8202
Epoch 7: Train Loss = 0.1904, Train Accuracy = 0.9347, Validation Loss = 0.6706, Validation Accuracy = 0.8130
Epoch 8: Train Loss = 0.1542, Train Accuracy = 0.9469, Validation Loss = 0.7479, Validation Accuracy = 0.7944
Epoch 9: Train Loss = 0.1372, Train Accuracy = 0.9545, Validation Loss = 0.6834, Validation Accuracy = 0.8035
Early stop

([0.8929656528949738,
  0.6006718823909759,
  0.4960545566082001,
  0.3823473824739456,
  0.2760554719746113,
  0.22518192130327225,
  0.19035413164794446,
  0.15420716393887995,
  0.1371852827847004],
 [0.7280047198010099,
  0.9266072583805983,
  0.6821850641707706,
  0.6364684030888187,
  0.6115278063496207,
  0.5832732796289359,
  0.6706070731020277,
  0.7478745361422278,
  0.6834482523096594])

In [22]:
accuracy, f1, confusion_matrix = evaluate_model(model_pretrained_1, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

TypeError: 'numpy.ndarray' object is not callable

In [21]:
confusion_matrix

array([[793,  32,  28,   8,  18,   4,   8,  16,  31,  62],
       [ 14, 876,   5,   4,   8,   3,   3,   3,   6,  78],
       [ 55,  11, 668,  50,  75,  45,  45,  24,  12,  15],
       [ 30,  22,  62, 490,  82, 159,  67,  52,  12,  24],
       [ 15,   7,  57,  38, 724,  35,  36,  73,   7,   8],
       [ 18,  10,  49, 143,  57, 608,  17,  83,   5,  10],
       [  4,  19,  40,  41,  47,  27, 794,  15,   4,   9],
       [ 15,   6,  22,  19,  53,  19,   5, 845,   2,  14],
       [ 92,  60,   8,   8,   4,   4,   3,   7, 747,  67],
       [ 27,  74,   5,   4,   8,   3,   2,  13,   9, 855]])

In [25]:
finish_run()

0,1
epoch,▁▂▃▄▅▅▆▇█
train_loss,█▅▄▃▂▂▁▁▁
val_loss,▄█▃▂▂▁▃▄▃

0,1
epoch,9.0
train_loss,0.13719
val_loss,0.68345


# 1.2 Resized images

### Resnet from scratch

In [12]:
model_scratch_2 = get_resnet18(num_classes=num_classes, pretrained=False)
train_losses, val_losses = train_model(model_scratch_2, train_loader_2, val_loader_2, test_loader_2, epochs=20, lr=0.001, patience=3, model_name = "Scratch Resnet18", image_size = 224)

Epoch 1: Train Loss = 1.4950, Train Accuracy = 0.4551, Validation Loss = 1.3145, Validation Accuracy = 0.5361
Epoch 2: Train Loss = 0.9378, Train Accuracy = 0.6669, Validation Loss = 0.8914, Validation Accuracy = 0.6853
Epoch 3: Train Loss = 0.7121, Train Accuracy = 0.7518, Validation Loss = 0.7191, Validation Accuracy = 0.7538
Epoch 4: Train Loss = 0.5722, Train Accuracy = 0.8012, Validation Loss = 0.6134, Validation Accuracy = 0.7897
Epoch 5: Train Loss = 0.4605, Train Accuracy = 0.8404, Validation Loss = 0.5933, Validation Accuracy = 0.8005
Epoch 6: Train Loss = 0.3608, Train Accuracy = 0.8734, Validation Loss = 0.6274, Validation Accuracy = 0.7934
Epoch 7: Train Loss = 0.2698, Train Accuracy = 0.9045, Validation Loss = 0.6284, Validation Accuracy = 0.8123
Epoch 8: Train Loss = 0.1964, Train Accuracy = 0.9310, Validation Loss = 0.5868, Validation Accuracy = 0.8246
Epoch 9: Train Loss = 0.1391, Train Accuracy = 0.9509, Validation Loss = 0.7615, Validation Accuracy = 0.8072
Epoch 10: 

In [15]:
accuracy, f1, confusion_matrix = evaluate_model(model_scratch_2, test_loader_2)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.825, f1: 0.8233035370627926


In [16]:
confusion_matrix

array([[852,  11,  27,   5,   9,   2,   4,  11,  63,  16],
       [ 10, 942,   1,   3,   2,   2,   1,   0,  16,  23],
       [ 60,   4, 737,  31,  47,  29,  47,  35,   8,   2],
       [ 29,  11,  38, 667,  40, 101,  55,  44,  11,   4],
       [  5,   3,  44,  34, 846,  11,  26,  28,   3,   0],
       [  9,   0,  36, 150,  43, 682,  25,  48,   4,   3],
       [  7,   3,  39,  22,  25,  10, 882,   8,   3,   1],
       [  7,   2,  11,  22,  46,  16,   1, 894,   1,   0],
       [ 30,  13,   4,   7,   4,   2,   3,   4, 923,  10],
       [ 18,  97,   1,   7,   3,   2,   4,  14,  29, 825]])

### Pre trained resnet

In [13]:
model_pretrained_2 = get_resnet18(num_classes=num_classes, pretrained=True)

train_model(model_pretrained_2, train_loader_2, val_loader_2, test_loader_2, epochs=10, lr=0.001, patience=3, model_name = "Pretrained Resnet18", image_size = 224)

Epoch 1: Train Loss = 0.7481, Train Accuracy = 0.7459, Validation Loss = 0.6435, Validation Accuracy = 0.7783
Epoch 2: Train Loss = 0.4436, Train Accuracy = 0.8490, Validation Loss = 0.5349, Validation Accuracy = 0.8181
Epoch 3: Train Loss = 0.3322, Train Accuracy = 0.8847, Validation Loss = 0.3911, Validation Accuracy = 0.8716
Epoch 4: Train Loss = 0.2457, Train Accuracy = 0.9151, Validation Loss = 0.3677, Validation Accuracy = 0.8793
Epoch 5: Train Loss = 0.1816, Train Accuracy = 0.9375, Validation Loss = 0.4107, Validation Accuracy = 0.8746
Epoch 6: Train Loss = 0.1334, Train Accuracy = 0.9547, Validation Loss = 0.4230, Validation Accuracy = 0.8742
Epoch 7: Train Loss = 0.1103, Train Accuracy = 0.9622, Validation Loss = 0.4657, Validation Accuracy = 0.8728
Early stopping at epoch 7 due to no improvement in validation loss.


([0.7481457582950592,
  0.4436212016582489,
  0.3321911883711815,
  0.2456640499457717,
  0.18159257866963743,
  0.13338503484800457,
  0.11027054528999142],
 [0.6435292354597452,
  0.5348895857223688,
  0.39114145239511616,
  0.36769532583677733,
  0.4107146911180248,
  0.42295592366316065,
  0.4657470988103757])

In [15]:
accuracy, f1, confusion_matrix = evaluate_model(model_pretrained_2, test_loader_2)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.8727, f1: 0.8721663722870442


In [16]:
confusion_matrix

array([[878,  10,  28,  11,   5,   2,   4,  10,  40,  12],
       [  0, 986,   0,   0,   0,   0,   0,   1,   7,   6],
       [ 42,   7, 803,  32,  23,  25,  29,  33,   5,   1],
       [ 13,  22,  27, 807,  15,  45,  32,  21,   9,   9],
       [  8,   6,  29,  39, 825,  18,  30,  44,   0,   1],
       [  8,  13,  15, 141,  10, 767,   8,  28,   6,   4],
       [  5,   4,  29,  18,   5,   4, 926,   3,   2,   4],
       [  5,   7,   2,  19,  11,  19,   1, 935,   0,   1],
       [ 29,  12,   7,   1,   0,   0,   2,   3, 934,  12],
       [ 10, 109,   2,   0,   0,   0,   0,   3,  10, 866]])

## 1.3 Modifying architecture

In [27]:
def get_resnet18(num_classes, pretrained=False, variant=1):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)

    if variant == 1:
        # Reduce kernel size & remove maxpool
        model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        model.maxpool = nn.Identity()

    elif variant == 2:
        # Reduce kernel size without removing maxpool
        model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # model.maxpool = nn.Identity()

    elif variant == 3:
        # Reduce kernel size & increase stride, remove maxpool
        model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        model.maxpool = nn.Identity()

    # elif variant == 4:
    #     # make kernel size 7
    #     model.conv1 = nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, bias=False)
    #     # model.maxpool = nn.Identity()
    
    # Modify the final fully connected layer
    model.fc = nn.Linear(512, num_classes)

    return model

In [28]:
# models trained from scratch
scratch_variant_1 = get_resnet18(num_classes, pretrained=False, variant=1)
scratch_variant_2 = get_resnet18(num_classes, pretrained=False, variant=2)
scratch_variant_3 = get_resnet18(num_classes, pretrained=False, variant=3)
scratch_variant_4 = get_resnet18(num_classes, pretrained=False, variant=4)
# pretrained models
pretrained_variant_1 = get_resnet18(num_classes, pretrained=True, variant=1)
pretrained_variant_2 = get_resnet18(num_classes, pretrained=True, variant=2)
pretrained_variant_3 = get_resnet18(num_classes, pretrained=True, variant=3)
pretrained_variant_4 = get_resnet18(num_classes, pretrained=True, variant=4)

### Variant 1: no maxpool, kernel size = 3, stride = 1

In [29]:
train_losses, val_losses = train_model(scratch_variant_1, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Scratch variant 1", image_size = 36)

Epoch 1: Train Loss = 1.3189, Train Accuracy = 0.5201, Validation Loss = 1.0268, Validation Accuracy = 0.6281
Epoch 2: Train Loss = 0.8350, Train Accuracy = 0.7037, Validation Loss = 0.7682, Validation Accuracy = 0.7261
Epoch 3: Train Loss = 0.6327, Train Accuracy = 0.7780, Validation Loss = 0.7200, Validation Accuracy = 0.7504
Epoch 4: Train Loss = 0.5022, Train Accuracy = 0.8257, Validation Loss = 0.6051, Validation Accuracy = 0.7962
Epoch 5: Train Loss = 0.3871, Train Accuracy = 0.8631, Validation Loss = 0.7468, Validation Accuracy = 0.7717
Epoch 6: Train Loss = 0.2989, Train Accuracy = 0.8947, Validation Loss = 0.6623, Validation Accuracy = 0.8008
Epoch 7: Train Loss = 0.2121, Train Accuracy = 0.9255, Validation Loss = 0.6495, Validation Accuracy = 0.8075
Early stopping at epoch 7 due to no improvement in validation loss.


In [34]:
accuracy, f1, confusion_matrix = evaluate_model(scratch_variant_1, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.8065, f1: 0.8078684140857917


In [35]:
confusion_matrix

array([[858,  11,  68,   9,   4,   3,   5,   6,  24,  12],
       [ 22, 922,   5,   1,   1,   3,   9,   1,  16,  20],
       [ 27,   1, 844,  43,  24,  14,  34,  11,   1,   1],
       [ 23,   5, 104, 696,  13,  92,  40,  19,   4,   4],
       [ 13,   3, 150,  62, 644,  27,  67,  29,   5,   0],
       [  7,   0,  91, 161,  13, 672,  10,  43,   3,   0],
       [  7,   2,  57,  38,   4,  14, 872,   2,   4,   0],
       [ 11,   2,  57,  32,  22,  26,  12, 831,   2,   5],
       [ 49,  17,  16,   7,   1,   1,   3,   1, 894,  11],
       [ 32,  70,  14,   3,   0,   9,   8,  15,  17, 832]])

In [36]:
finish_run()

0,1
epoch,▁▂▃▅▆▇█
train_loss,█▅▄▃▂▂▁
val_loss,█▄▃▁▃▂▂

0,1
epoch,7.0
train_loss,0.21214
val_loss,0.64951


In [37]:
train_losses, val_losses = train_model(pretrained_variant_1, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Pretrained variant 1", image_size = 36)

Epoch 1: Train Loss = 0.8593, Train Accuracy = 0.7000, Validation Loss = 0.6376, Validation Accuracy = 0.7846
Epoch 2: Train Loss = 0.4875, Train Accuracy = 0.8327, Validation Loss = 0.5163, Validation Accuracy = 0.8230
Epoch 3: Train Loss = 0.3389, Train Accuracy = 0.8824, Validation Loss = 0.4469, Validation Accuracy = 0.8538
Epoch 4: Train Loss = 0.2349, Train Accuracy = 0.9197, Validation Loss = 0.4675, Validation Accuracy = 0.8476
Epoch 5: Train Loss = 0.1705, Train Accuracy = 0.9409, Validation Loss = 0.4814, Validation Accuracy = 0.8494
Epoch 6: Train Loss = 0.1220, Train Accuracy = 0.9571, Validation Loss = 0.6545, Validation Accuracy = 0.8351
Early stopping at epoch 6 due to no improvement in validation loss.


In [41]:
accuracy, f1, conf_matrix = evaluate_model(pretrained_variant_1, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.8391, f1: 0.835976417834876


In [43]:
conf_matrix

array([[888,   4,  15,   4,  17,   8,   0,   6,  29,  29],
       [ 10, 915,   1,   1,   1,   1,   1,   0,  11,  59],
       [ 64,   2, 724,  22,  63,  77,  19,  16,   8,   5],
       [ 21,   4,  36, 486,  83, 293,  31,  28,  11,   7],
       [ 17,   2,  30,  13, 888,  26,  13,   7,   3,   1],
       [  3,   1,  18,  30,  27, 899,   4,  14,   2,   2],
       [ 13,   0,  26,  11,  24,  33, 870,   6,   9,   8],
       [  7,   0,   8,   5,  80,  36,   1, 859,   3,   1],
       [ 38,   9,   6,   1,   2,   5,   3,   2, 917,  17],
       [ 10,  24,   0,   2,   2,   5,   0,   3,   9, 945]])

In [44]:
finish_run()

0,1
epoch,▁▂▄▅▇█
train_loss,█▄▃▂▁▁
val_loss,▇▃▁▂▂█

0,1
epoch,6.0
train_loss,0.12204
val_loss,0.65449


### Variant 2: maxpool, kernel size = 3, stride = 1

In [45]:
train_losses, val_losses = train_model(scratch_variant_2, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Scratch variant 2", image_size = 36)

Epoch 1: Train Loss = 1.2665, Train Accuracy = 0.5427, Validation Loss = 0.9964, Validation Accuracy = 0.6467
Epoch 2: Train Loss = 0.8493, Train Accuracy = 0.7023, Validation Loss = 0.8229, Validation Accuracy = 0.7205
Epoch 3: Train Loss = 0.6616, Train Accuracy = 0.7705, Validation Loss = 0.7603, Validation Accuracy = 0.7424
Epoch 4: Train Loss = 0.5331, Train Accuracy = 0.8121, Validation Loss = 0.6643, Validation Accuracy = 0.7728
Epoch 5: Train Loss = 0.4084, Train Accuracy = 0.8584, Validation Loss = 0.6388, Validation Accuracy = 0.7906
Epoch 6: Train Loss = 0.3078, Train Accuracy = 0.8925, Validation Loss = 0.6489, Validation Accuracy = 0.7924
Epoch 7: Train Loss = 0.2237, Train Accuracy = 0.9217, Validation Loss = 0.7297, Validation Accuracy = 0.7915
Epoch 8: Train Loss = 0.1676, Train Accuracy = 0.9406, Validation Loss = 0.7812, Validation Accuracy = 0.7855
Early stopping at epoch 8 due to no improvement in validation loss.


In [57]:
accuracy, f1, confusion_matrix = evaluate_model(scratch_variant_2, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.7866, f1: 0.7851912744939368


In [47]:
confusion_matrix

array([[799,  19,  62,  18,  18,   3,   5,   5,  51,  20],
       [  5, 923,   5,   5,   2,   1,   7,   2,  18,  32],
       [ 43,   1, 815,  26,  48,  17,  31,  10,   7,   2],
       [ 11,   2, 119, 661,  80,  36,  60,  17,   8,   6],
       [ 18,   3,  91,  31, 794,   4,  25,  29,   5,   0],
       [  6,   2, 130, 249,  48, 480,  35,  37,  11,   2],
       [  6,   4,  40,  42,  30,   1, 865,   3,   6,   3],
       [  7,   3,  56,  38,  40,  10,   5, 829,   2,  10],
       [ 31,  27,  17,   9,   5,   2,   3,   0, 893,  13],
       [ 24,  95,  21,   6,   2,   1,   9,  14,  21, 807]])

In [48]:
finish_run()

0,1
epoch,▁▂▃▄▅▆▇█
train_loss,█▅▄▃▃▂▁▁
val_loss,█▅▃▁▁▁▃▄

0,1
epoch,8.0
train_loss,0.1676
val_loss,0.7812


In [49]:
train_losses, val_losses = train_model(pretrained_variant_2, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Pretrained variant 2", image_size = 36)

Epoch 1: Train Loss = 1.0640, Train Accuracy = 0.6298, Validation Loss = 0.7979, Validation Accuracy = 0.7286
Epoch 2: Train Loss = 0.6395, Train Accuracy = 0.7804, Validation Loss = 0.6554, Validation Accuracy = 0.7794
Epoch 3: Train Loss = 0.4658, Train Accuracy = 0.8410, Validation Loss = 0.6239, Validation Accuracy = 0.7991
Epoch 4: Train Loss = 0.3421, Train Accuracy = 0.8820, Validation Loss = 0.6186, Validation Accuracy = 0.8025
Epoch 5: Train Loss = 0.2549, Train Accuracy = 0.9115, Validation Loss = 0.6172, Validation Accuracy = 0.8150
Epoch 6: Train Loss = 0.1839, Train Accuracy = 0.9375, Validation Loss = 0.7341, Validation Accuracy = 0.8057
Epoch 7: Train Loss = 0.1436, Train Accuracy = 0.9519, Validation Loss = 0.7120, Validation Accuracy = 0.8118
Epoch 8: Train Loss = 0.1195, Train Accuracy = 0.9596, Validation Loss = 0.7535, Validation Accuracy = 0.8107
Early stopping at epoch 8 due to no improvement in validation loss.


In [52]:
accuracy, f1, confusion_matrix = evaluate_model(pretrained_variant_2, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.8108, f1: 0.8098750816949611


In [53]:
confusion_matrix

array([[869,   8,  48,   4,  12,   1,   5,  11,  31,  11],
       [ 12, 918,  11,   0,   0,   1,   5,   3,  30,  20],
       [ 42,   2, 821,  25,  34,  24,  27,  20,   2,   3],
       [ 22,   5, 108, 612,  62,  90,  42,  41,  11,   7],
       [  9,   2,  63,  33, 786,  11,  19,  74,   2,   1],
       [ 17,   2,  80, 139,  38, 647,  21,  51,   1,   4],
       [ 12,   3,  61,  22,  19,  11, 844,  16,   6,   6],
       [ 12,   0,  37,  13,  22,   9,   1, 901,   1,   4],
       [ 43,   9,  25,   4,  14,   8,   7,   3, 882,   5],
       [ 27,  86,   8,   8,   1,   6,   3,   6,  27, 828]])

In [54]:
finish_run()

0,1
epoch,▁▂▃▄▅▆▇█
train_loss,█▅▄▃▂▁▁▁
val_loss,█▂▁▁▁▆▅▆

0,1
epoch,8.0
train_loss,0.11952
val_loss,0.75353


In [64]:
finish_run()

0,1
epoch,▁▂▃▅▆▇█
train_loss,█▅▄▃▂▁▁
val_loss,█▅▂▁▂▁▂

0,1
epoch,7.0
train_loss,0.21788
val_loss,0.75483


# Variant 3: no maxpool, kernel size = 3, stride = 2

In [58]:
train_losses, val_losses = train_model(scratch_variant_3, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Scratch variant 3", image_size = 36)

Epoch 1: Train Loss = 1.3404, Train Accuracy = 0.5136, Validation Loss = 1.0653, Validation Accuracy = 0.6227
Epoch 2: Train Loss = 0.8943, Train Accuracy = 0.6834, Validation Loss = 0.8887, Validation Accuracy = 0.6955
Epoch 3: Train Loss = 0.6805, Train Accuracy = 0.7621, Validation Loss = 0.7497, Validation Accuracy = 0.7383
Epoch 4: Train Loss = 0.5332, Train Accuracy = 0.8135, Validation Loss = 0.6830, Validation Accuracy = 0.7692
Epoch 5: Train Loss = 0.3990, Train Accuracy = 0.8610, Validation Loss = 0.7366, Validation Accuracy = 0.7594
Epoch 6: Train Loss = 0.2971, Train Accuracy = 0.8952, Validation Loss = 0.6943, Validation Accuracy = 0.7872
Epoch 7: Train Loss = 0.2179, Train Accuracy = 0.9237, Validation Loss = 0.7548, Validation Accuracy = 0.7853
Early stopping at epoch 7 due to no improvement in validation loss.


In [61]:
accuracy, f1, confusion_matrix = evaluate_model(scratch_variant_3, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.7822, f1: 0.7812596049746289


In [62]:
confusion_matrix

array([[843,   4,  49,  11,  27,   4,  12,   7,  35,   8],
       [ 26, 827,   6,   6,   6,   3,  10,   3,  55,  58],
       [ 44,   3, 656,  45,  88,  63,  73,  16,   6,   6],
       [ 34,   2,  50, 565,  74, 151,  82,  28,  10,   4],
       [ 11,   1,  20,  34, 825,  35,  32,  35,   7,   0],
       [ 15,   2,  25, 122,  47, 722,  36,  26,   2,   3],
       [  4,   1,  15,  41,  36,  13, 877,   3,   9,   1],
       [ 18,   1,   8,  31,  59,  61,   4, 808,   4,   6],
       [ 57,   5,   7,   3,   8,   4,   3,   3, 895,  15],
       [ 65,  41,   7,   8,   3,   6,  10,  22,  34, 804]])

In [65]:
finish_run()

In [66]:
train_losses, val_losses = train_model(pretrained_variant_3, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Pretrained variant 3", image_size = 36)

Epoch 1: Train Loss = 0.1684, Train Accuracy = 0.9437, Validation Loss = 0.6669, Validation Accuracy = 0.8200
Epoch 2: Train Loss = 0.1259, Train Accuracy = 0.9588, Validation Loss = 0.7140, Validation Accuracy = 0.8172
Epoch 3: Train Loss = 0.1061, Train Accuracy = 0.9641, Validation Loss = 0.7369, Validation Accuracy = 0.8196
Epoch 4: Train Loss = 0.0848, Train Accuracy = 0.9707, Validation Loss = 0.6924, Validation Accuracy = 0.8250
Early stopping at epoch 4 due to no improvement in validation loss.


In [69]:
accuracy, f1, confusion_matrix = evaluate_model(pretrained_variant_3, test_loader_1)
print(f"Accuracy: {accuracy}, f1: {f1}")

Accuracy: 0.8197, f1: 0.8195491458886749


In [70]:
confusion_matrix

array([[856,  14,  35,  13,   5,   4,   6,   3,  39,  25],
       [  9, 926,   4,   2,   1,   1,   1,   2,   6,  48],
       [ 63,   5, 743,  58,  46,  20,  45,   9,   6,   5],
       [ 26,   7,  41, 726,  49,  59,  50,  21,  12,   9],
       [ 18,   3,  50,  54, 798,  10,  32,  28,   6,   1],
       [ 18,   3,  40, 231,  32, 626,  16,  27,   4,   3],
       [ 11,   8,  31,  40,  18,  10, 867,   1,   9,   5],
       [ 27,   2,  14,  44,  28,  23,   1, 851,   2,   8],
       [ 41,  24,  10,   7,   3,   1,   2,   0, 890,  22],
       [ 21,  42,   3,   5,   1,   1,   2,   5,   6, 914]])

### Observations

1. Pretrained models perform better than those trained from scratch.
2. Image sise 224 gives better results than image size 36.
3. Less kernel size gives better results
4. Maxpool removal gives better results

### Randomly initialising the first layer

In [75]:
import torch.nn.init as init
import torchvision.models as models
import torch.nn as nn

def get_resnet18(num_classes, pretrained=False):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)
    
    # Randomly initialize only the first layer (conv1)
    init.kaiming_normal_(model.conv1.weight, mode='fan_out', nonlinearity='relu')

    model.fc = nn.Linear(512, num_classes)  # Modify FC layer
    return model

#### *Prompt: Modify this code to load model and randomly initialise first layer*

In [76]:
finish_run()

0,1
epoch,▁▃▆█
train_loss,█▄▃▁
val_loss,▁▆█▄

0,1
epoch,4.0
train_loss,0.08483
val_loss,0.69238


In [77]:
random_init_model = get_resnet18(num_classes, pretrained=False)
train_model(random_init_model, train_loader_1, val_loader_1, test_loader_1, epochs=20, lr=0.001, patience=3, model_name = "Random initialised model", image_size = 36)

Epoch 1: Train Loss = 1.3561, Train Accuracy = 0.5126, Validation Loss = 1.3047, Validation Accuracy = 0.5516
Epoch 2: Train Loss = 0.9754, Train Accuracy = 0.6573, Validation Loss = 1.0035, Validation Accuracy = 0.6533
Epoch 3: Train Loss = 0.7934, Train Accuracy = 0.7217, Validation Loss = 0.8466, Validation Accuracy = 0.7043
Epoch 4: Train Loss = 0.6487, Train Accuracy = 0.7717, Validation Loss = 0.7801, Validation Accuracy = 0.7345
Epoch 5: Train Loss = 0.5331, Train Accuracy = 0.8152, Validation Loss = 0.9734, Validation Accuracy = 0.6872
Epoch 6: Train Loss = 0.4174, Train Accuracy = 0.8529, Validation Loss = 0.7876, Validation Accuracy = 0.7475
Epoch 7: Train Loss = 0.3208, Train Accuracy = 0.8866, Validation Loss = 0.8337, Validation Accuracy = 0.7562
Early stopping at epoch 7 due to no improvement in validation loss.


([1.3560553646087647,
  0.9753717681884766,
  0.7933772093772888,
  0.6487215874671936,
  0.5331039210319519,
  0.41737567377090456,
  0.3208009243011475],
 [1.3047101615340846,
  1.0035328895423063,
  0.8466236249656435,
  0.7801486701722358,
  0.9734292846576423,
  0.7876081544502526,
  0.8337333246021513])

## Observations & Explanations

1. **Pretrained models perform better**  
   - They already learned general patterns from ImageNet, making training more efficient and preventing overfitting on smaller datasets.  

2. **Image size 224 is better than 36**  
   - ResNet was originally designed for 224×224 images, so its architecture is optimized for this resolution. Using smaller images (36×36) leads to excessive downsampling in early layers, losing crucial spatial details.  

3. **Smaller kernels improve performance**  
   - Smaller kernels (e.g., 3x3 instead of 7x7) focus on finer details, improve feature extraction, and introduce more non-linearity, leading to better generalization.  

4. **Removing max pooling helps**  
   - Max pooling reduces spatial resolution too quickly for small images, causing information loss. Removing it allows deeper layers to retain more useful features.
