In [1]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import time
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd
from glob import glob

# --- Download and Prepare TinyImageNet ---
print("Downloading TinyImageNet...")
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip -q tiny-imagenet-200.zip

# --- Prepare Validation Data Directory Structure ---
print("Preparing validation data directory structure...")
val_data = pd.read_csv('./tiny-imagenet-200/val/val_annotations.txt', sep='\t', header=None, names=['File', 'Class', 'X', 'Y', 'H', 'W'])

val_images_dir = './tiny-imagenet-200/val/images'
val_dest_dir = './tiny-imagenet-200/val/'

for index, row in val_data.iterrows():
    class_dir = os.path.join(val_dest_dir, row['Class'])
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)
    image_path = os.path.join(val_images_dir, row['File'])
    dest_path = os.path.join(class_dir, row['File'])
    os.rename(image_path, dest_path)

# --- Configuration ---
DATA_DIR = "./tiny-imagenet-200"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")



Downloading TinyImageNet...
--2025-10-17 15:57:48--  http://cs231n.stanford.edu/tiny-imagenet-200.zip
Resolving cs231n.stanford.edu (cs231n.stanford.edu)... 171.64.64.64
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cs231n.stanford.edu/tiny-imagenet-200.zip [following]
--2025-10-17 15:57:48--  https://cs231n.stanford.edu/tiny-imagenet-200.zip
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248100043 (237M) [application/zip]
Saving to: â€˜tiny-imagenet-200.zipâ€™


2025-10-17 15:58:21 (7.47 MB/s) - â€˜tiny-imagenet-200.zipâ€™ saved [248100043/248100043]

Preparing validation data directory structure...


## Configuration and Data Loading

In [1]:
import os
import shutil

print("Fixing validation directory structure...")

# Remove the old images directory if it exists
val_images_dir = './tiny-imagenet-200/val/images'
if os.path.exists(val_images_dir):
    print(f"Removing {val_images_dir}...")
    shutil.rmtree(val_images_dir)

# Rename all .JPEG files to .jpeg in both train and val directories
print("Renaming .JPEG files to .jpeg...")

for split in ['train', 'val']:
    split_dir = os.path.join('./tiny-imagenet-200', split)
    if os.path.exists(split_dir):
        # Walk through all subdirectories
        for root, dirs, files in os.walk(split_dir):
            for filename in files:
                if filename.endswith('.JPEG'):
                    old_path = os.path.join(root, filename)
                    new_filename = filename.replace('.JPEG', '.jpeg')
                    new_path = os.path.join(root, new_filename)
                    os.rename(old_path, new_path)
        print(f"Completed renaming in {split} directory")

print("Fix complete! Now proceed with Checkpoint 1")

Fixing validation directory structure...
Removing ./tiny-imagenet-200/val/images...
Renaming .JPEG files to .jpeg...
Completed renaming in train directory
Completed renaming in val directory
Fix complete! Now proceed with Checkpoint 1


In [10]:
# --- Configuration ---
DATA_DIR = "./tiny-imagenet-200"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 200  # TinyImageNet has 200 classes
BATCH_SIZE = 256 if torch.cuda.is_available() else 32
NUM_WORKERS = 2  # Adjusted for Colab
EPOCHS = 50
WEIGHT_DECAY = 0.0005
MOMENTUM = 0.9
LABEL_SMOOTHING = 0.1
print(f"Using device: {DEVICE}")
print(f"Batch Size: {BATCH_SIZE}, Workers: {NUM_WORKERS}, Epochs: {EPOCHS}")

# --- Data Augmentation and Loading ---
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        normalize,
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ]),
}

print("\nLoading datasets...")
image_datasets = {x: datasets.ImageFolder(os.path.join(DATA_DIR, x), data_transforms[x])
                  for x in ['train', 'val']}

dataloaders = {x: DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=(x=='train'),
                             num_workers=NUM_WORKERS, pin_memory=True)
               for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
classes = image_datasets['train'].classes
print(f"Train dataset size: {dataset_sizes['train']}, Validation dataset size: {dataset_sizes['val']}")
print(f"Number of classes: {len(classes)}")

Using device: cuda
Batch Size: 256, Workers: 2, Epochs: 50

Loading datasets...
Train dataset size: 100000, Validation dataset size: 10000
Number of classes: 200


## Model Setup and Loss/Optimizer


In [11]:
# --- Model Setup (ResNet-50) ---
print("\nSetting up ResNet-50 model...")
model = models.resnet50(weights=None, num_classes=NUM_CLASSES)
model = model.to(DEVICE)

# Loss function
criterion = nn.CrossEntropyLoss(label_smoothing=LABEL_SMOOTHING)

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, nesterov=True)

print("Model setup complete!")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")


Setting up ResNet-50 model...
Model setup complete!
Total parameters: 23,917,832


## LR Finder Class Definition

In [12]:
# ==============================================================================
# LR Finder Implementation
# ==============================================================================
class LRFinder:
    def __init__(self, model, optimizer, criterion, device):
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
        self.device = device
        self.history = {'lr': [], 'loss': [], 'acc': []}

    def range_test(self, dataloader, start_lr=1e-4, end_lr=1, num_iter=100):
        """Find optimal learning rate"""
        self.model.train()
        lr_mult = (end_lr / start_lr) ** (1 / num_iter)
        lr = start_lr

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr

        iter_count = 0
        running_loss = 0
        running_acc = 0
        pbar = tqdm(dataloader, desc='LR Range Test', total=num_iter)

        for inputs, labels in pbar:
            if iter_count >= num_iter:
                break

            inputs, labels = inputs.to(self.device), labels.to(self.device)

            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels)

            loss.backward()
            self.optimizer.step()

            # Calculate accuracy
            _, preds = torch.max(outputs, 1)
            acc = (preds == labels).float().mean().item()

            running_loss = 0.9 * running_loss + 0.1 * loss.item()
            running_acc = 0.9 * running_acc + 0.1 * acc

            self.history['lr'].append(lr)
            self.history['loss'].append(running_loss)
            self.history['acc'].append(running_acc)

            # Update learning rate
            lr *= lr_mult
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = lr

            pbar.set_postfix(lr=f'{lr:.2e}', loss=f'{running_loss:.4f}', acc=f'{running_acc:.4f}')
            iter_count += 1

    def plot(self, plot_case="loss"):
        """Plot LR finder results"""
        fig, ax = plt.subplots(figsize=(10, 6))
        if plot_case == "loss":
            ax.plot(self.history['lr'], self.history['loss'])
            ax.set_ylabel('Loss')
            ax.set_title('LR Finder - Loss')
        else:
            ax.plot(self.history['lr'], self.history['acc'])
            ax.set_ylabel('Accuracy')
            ax.set_title('LR Finder - Accuracy')
        ax.set_xlabel('Learning Rate')
        ax.set_xscale('log')
        plt.grid(True)
        plt.show()

    def get_best_lr(self, case="loss"):
        """Get the best learning rate"""
        if case == "loss":
            # Find LR with minimum loss (after initial descent)
            min_idx = np.argmin(self.history['loss'][10:]) + 10
            best_lr = self.history['lr'][min_idx]
        else:
            # Find LR with maximum accuracy
            max_idx = np.argmax(self.history['acc'])
            best_lr = self.history['lr'][max_idx]
        print(f"Best LR ({case}): {best_lr:.2e}")
        return best_lr

    def reset(self):
        """Reset model and optimizer to initial state"""
        self.model.load_state_dict(self.initial_state_dict)
        self.optimizer.load_state_dict(self.initial_optimizer_state)

print("LR Finder class defined!")

LR Finder class defined!


## Trainer and Tester Classes

In [13]:
# ==============================================================================
# Training and Testing Functions
# ==============================================================================
class Trainer:
    def __init__(self):
        self.train_losses = []
        self.train_acc = []

    def train(self, model, device, train_loader, optimizer, criterion, scheduler=None):
        """Train for one epoch"""
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        pbar = tqdm(train_loader, desc='Training')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            if scheduler:
                scheduler.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # Update progress bar
            curr_acc = 100. * correct / total
            curr_loss = running_loss / total
            lr = optimizer.param_groups[0]['lr'] if scheduler else optimizer.param_groups[0]['lr']
            pbar.set_postfix(loss=f'{curr_loss:.4f}', acc=f'{curr_acc:.2f}%', lr=f'{lr:.2e}')

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = 100. * correct / len(train_loader.dataset)

        self.train_losses.append(epoch_loss)
        self.train_acc.append(epoch_acc)

        return epoch_loss, epoch_acc

class Tester:
    def __init__(self):
        self.test_losses = []
        self.test_acc = []
        self.misclassified_images = []
        self.trueclassified_images = []

    def test(self, model, device, test_loader, criterion, misclassfied_required=False,
             trueclassified_required=False, classes=None, class_accuracy=90):
        """Test the model"""
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0

        self.misclassified_images = []
        self.trueclassified_images = []

        with torch.no_grad():
            pbar = tqdm(test_loader, desc='Testing')
            for inputs, labels in pbar:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

                # Store misclassified images
                if misclassfied_required:
                    mask = preds != labels
                    for i in range(len(mask)):
                        if mask[i] and len(self.misclassified_images) < 20:
                            self.misclassified_images.append({
                                'image': inputs[i].cpu(),
                                'pred': preds[i].item(),
                                'true': labels[i].item()
                            })

                # Store correctly classified images
                if trueclassified_required:
                    mask = preds == labels
                    for i in range(len(mask)):
                        if mask[i] and len(self.trueclassified_images) < 20:
                            self.trueclassified_images.append({
                                'image': inputs[i].cpu(),
                                'pred': preds[i].item(),
                                'true': labels[i].item()
                            })

                # Update progress bar
                curr_acc = 100. * correct / total
                pbar.set_postfix(acc=f'{curr_acc:.2f}%')

        epoch_loss = running_loss / len(test_loader.dataset)
        epoch_acc = 100. * correct / len(test_loader.dataset)

        self.test_losses.append(epoch_loss)
        self.test_acc.append(epoch_acc)

        print(f'Test Loss: {epoch_loss:.4f}, Test Acc: {epoch_acc:.2f}%')

        return epoch_loss, epoch_acc

print("Trainer and Tester classes defined!")

Trainer and Tester classes defined!


## Utility Functions (Save/Load and Plotter)

In [14]:
# ==============================================================================
# Model Save/Load and Plotting Utilities
# ==============================================================================

def save_model(model, optimizer, criterion, scheduler, epoch, train_acc, train_loss, lr, test_acc, test_loss, file_name):
    """Save model checkpoint"""
    checkpoint = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict(),
        'criterion': criterion,
        'epochs': epoch,
        'train_acc': train_acc,
        'train_loss': train_loss,
        'lr': lr,
        'test_acc': test_acc,
        'test_loss': test_loss
    }
    torch.save(checkpoint, file_name)
    print(f"Model saved to {file_name}")

def load_checkpoint(model, optimizer, scheduler, criterion, filename='checkpoint.pth'):
    """Load model checkpoint"""
    start_epoch = 0
    if os.path.isfile(filename):
        print(f"=> loading checkpoint '{filename}'")
        checkpoint = torch.load(filename)
        start_epoch = checkpoint['epochs']
        criterion = checkpoint['criterion']
        train_acc = checkpoint['train_acc']
        train_loss = checkpoint['train_loss']
        lr = checkpoint['lr']
        test_acc = checkpoint['test_acc']
        test_loss = checkpoint['test_loss']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        print(f"=> loaded checkpoint '{filename}' (epoch {checkpoint['epochs']})")
    else:
        print(f"=> no checkpoint found at '{filename}'")
        train_acc = []
        train_loss = []
        lr = []
        test_acc = []
        test_loss = []
    return model, optimizer, scheduler, criterion, start_epoch, train_acc, train_loss, lr, test_acc, test_loss

class Plotter:
    """Plotting utilities"""

    @staticmethod
    def plot_metrics(train_acc, train_loss, test_acc, test_loss):
        """Plot training metrics"""
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))

        # Plot accuracy
        axes[0].plot(train_acc, label='Train Accuracy', marker='o')
        axes[0].plot(test_acc, label='Test Accuracy', marker='s')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Accuracy (%)')
        axes[0].set_title('Training and Test Accuracy')
        axes[0].legend()
        axes[0].grid(True)

        # Plot loss
        axes[1].plot(train_loss, label='Train Loss', marker='o')
        axes[1].plot(test_loss, label='Test Loss', marker='s')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Loss')
        axes[1].set_title('Training and Test Loss')
        axes[1].legend()
        axes[1].grid(True)

        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_lr_schedule(epochs, lr_history):
        """Plot learning rate schedule"""
        plt.figure(figsize=(10, 5))
        plt.plot(lr_history)
        plt.xlabel('Training Steps')
        plt.ylabel('Learning Rate')
        plt.title('OneCycleLR Schedule')
        plt.grid(True)
        plt.yscale('log')
        plt.show()

    @staticmethod
    def plot_images(images_data, classes, title="Images", denormalize=True):
        """Plot a grid of images"""
        if len(images_data) == 0:
            print("No images to display")
            return

        # Denormalize images
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])

        fig, axes = plt.subplots(4, 5, figsize=(15, 12))
        axes = axes.ravel()

        for idx, img_data in enumerate(images_data[:20]):
            if idx >= 20:
                break
            img = img_data['image'].numpy().transpose(1, 2, 0)
            if denormalize:
                img = std * img + mean
                img = np.clip(img, 0, 1)

            axes[idx].imshow(img)
            pred_label = classes[img_data['pred']] if len(classes) > img_data['pred'] else str(img_data['pred'])
            true_label = classes[img_data['true']] if len(classes) > img_data['true'] else str(img_data['true'])
            axes[idx].set_title(f'Pred: {pred_label}\nTrue: {true_label}', fontsize=8)
            axes[idx].axis('off')

        plt.suptitle(title, fontsize=16)
        plt.tight_layout()
        plt.show()

print("Utility functions defined!")

Utility functions defined!


## Main Training Loop

In [25]:
# ==============================================================================
# Skip LR Finder - Use Fixed Learning Rate
# ==============================================================================
import gc

# Clear GPU cache
torch.cuda.empty_cache()
gc.collect()

# Override EPOCHS to 1
EPOCHS = 1

# Set maximum learning rate directly
MAX_LR = 0.01
print(f"Using fixed MAX_LR: {MAX_LR}")
print(f"Training for {EPOCHS} epochs")
print("Skipping LR Finder to save time and memory.")

Using fixed MAX_LR: 0.01
Training for 1 epochs
Skipping LR Finder to save time and memory.


In [29]:
# ==============================================================================
# Update EPOCHS to 1
# ==============================================================================
EPOCHS = 1
print(f"Training for {EPOCHS} epoch only")

# ==============================================================================
# Main Training Loop with OneCycleLR
# ==============================================================================
print("="*80)
print("Starting Training with OneCycleLR")
print("="*80)

# Setup OneCycleLR scheduler
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=MAX_LR,
    total_steps=None,
    epochs=EPOCHS,
    steps_per_epoch=len(dataloaders['train']),
    pct_start=0.3,  # Warmup for 30% of epoch
    anneal_strategy='linear',
    cycle_momentum=False,
    base_momentum=0.9,
    max_momentum=0.95,
    div_factor=10,
    final_div_factor=10
)

# Initialize training and testing objects
train_obj = Trainer()
test_obj = Tester()

# Training history
train_acc = []
train_loss = []
lr_history = []
test_acc = []
test_losses = []

# Training loop
start_time = time.time()
best_acc = 0.0

for epoch in range(1, EPOCHS + 1):
    print(f"\n{'='*80}")
    print(f"EPOCH: {epoch}/{EPOCHS}")
    print(f"Learning Rate: {optimizer.param_groups[0]['lr']:.2e}")
    print(f"{'='*80}")

    # Store learning rate
    lr_history.append(optimizer.param_groups[0]['lr'])

    # Train
    epoch_start = time.time()
    train_obj.train(model, DEVICE, dataloaders['train'], optimizer, criterion, scheduler=scheduler)

    # Test with misclassified and correctly classified images
    test_obj.test(model, DEVICE, dataloaders['val'], criterion,
                 misclassfied_required=True, trueclassified_required=True,
                 classes=classes, class_accuracy=90)

    # Store metrics
    train_acc.append(train_obj.train_acc[-1])
    train_loss.append(train_obj.train_losses[-1])
    test_acc.append(test_obj.test_acc[-1])
    test_losses.append(test_obj.test_losses[-1])

    epoch_time = time.time() - epoch_start
    print(f"\nEpoch Time: {epoch_time:.2f}s")
    print(f"Train Acc: {train_acc[-1]:.2f}% | Test Acc: {test_acc[-1]:.2f}%")

    # Save best model
    if test_acc[-1] > best_acc:
        best_acc = test_acc[-1]
        print(f"âœ¨ New best accuracy: {best_acc:.2f}%")
        save_model(model, optimizer, criterion, scheduler, epoch, train_acc, train_loss,
                  lr_history, test_acc, test_losses, 'imagenet200_resnet50_best.pth')

total_time = time.time() - start_time
print(f"\n{'='*80}")
print("Training Finished!")
print(f"{'='*80}")
print(f"Total training time: {total_time/60:.2f} minutes ({total_time/3600:.2f} hours)")
print(f"Final Training Accuracy: {train_acc[-1]:.2f}%")
print(f"Final Testing Accuracy: {test_acc[-1]:.2f}%")
print(f"Best Model saved at: imagenet200_resnet50_best.pth")

Training for 1 epoch only
Starting Training with OneCycleLR

EPOCH: 1/1
Learning Rate: 1.00e-03


Training:   0%|          | 0/782 [00:01<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 392.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 150.12 MiB is free. Process 2556 has 14.59 GiB memory in use. Of the allocated memory 13.17 GiB is allocated by PyTorch, and 1.28 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

## Visualization and Results

In [None]:
# ==============================================================================
# Visualize Training Results
# ==============================================================================

# Plot training metrics
print("Plotting training and validation metrics...")
Plotter.plot_metrics(train_acc, train_loss, test_acc, test_losses)

# Plot learning rate schedule
print("\nPlotting learning rate schedule...")
# For 1 epoch, we need to collect all lr values during training
# The lr_history list only has one entry per epoch, so let's just display it
print(f"Learning rate range during training: {min(lr_history) if lr_history else 'N/A':.2e} to {max(lr_history) if lr_history else 'N/A':.2e}")

# Display statistics
print("\n" + "="*80)
print("Training Summary")
print("="*80)
print(f"Final Train Accuracy: {train_acc[-1]:.2f}%")
print(f"Final Test Accuracy: {test_acc[-1]:.2f}%")
print(f"Final Train Loss: {train_loss[-1]:.4f}")
print(f"Final Test Loss: {test_losses[-1]:.4f}")
print("="*80)

# ==============================================================================
# Visualize Misclassified and Correctly Classified Images
# ==============================================================================

# Get misclassified and correctly classified images from the last epoch
misclassified_images = test_obj.misclassified_images
correctly_classified_images = test_obj.trueclassified_images

print(f"\nNumber of misclassified images stored: {len(misclassified_images)}")
print(f"Number of correctly classified images stored: {len(correctly_classified_images)}")

# Plot misclassified images
if len(misclassified_images) > 0:
    print("\nPlotting misclassified images...")
    Plotter.plot_images(misclassified_images, classes, title="Misclassified Images", denormalize=True)
else:
    print("\nNo misclassified images to display")

# Plot correctly classified images
if len(correctly_classified_images) > 0:
    print("\nPlotting correctly classified images...")
    Plotter.plot_images(correctly_classified_images, classes, title="Correctly Classified Images", denormalize=True)
else:
    print("\nNo correctly classified images to display")

print("\n" + "="*80)
print("All Done! ðŸŽ‰")
print("="*80)
print(f"\nYour model achieved {test_acc[-1]:.2f}% accuracy on TinyImageNet-200 after 1 epoch!")
print(f"Model checkpoint saved at: imagenet200_resnet50_best.pth")

## Training Results and Future Work

**Current Results:** During this trial run on Google Colab, training for 1 epoch yielded approximately **4% training accuracy** and **8% testing accuracy** on TinyImageNet-200.

**Why Only 1 Epoch?** The primary limitation encountered was the instability of Colab's T4 GPU runtime, which frequently disconnected between epochs. This led to kernel restarts and the need to re-run epochs from scratch, making multi-epoch training impractical in this environment.

**Why Low Accuracy?** It's important to note that this is merely a **trial/dummy architecture** to establish the training pipeline and workflow. The team and I will collaboratively develop a significantly improved architecture with better hyperparameters, data augmentation strategies, and model configurations in subsequent iterations.

**Next Steps:** The final optimized model will be trained on **AWS EC2 instances with adequate GPU resources** (avoiding the runtime disconnection issues faced on Colab), allowing for stable, long-duration training sessions to achieve competitive performance on the ImageNet-1k dataset.
