In [14]:
# Download Tiny ImageNet dataset
# !wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
# Unzip the dataset
# !unzip -qq tiny-imagenet-200.zip

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import os
from torch.utils.data import Dataset
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

# Define your TinyImageNetValDataset class
class TinyImageNetValDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.annotations = self.load_annotations()
        
    def load_annotations(self):
        # Load the val_annotations.txt file
        annotations_file = os.path.join(self.root_dir, "val_annotations.txt")
        annotations = {}
        with open(annotations_file, "r") as f:
            for line in f:
                parts = line.strip().split("\t")
                img_name = parts[0]
                class_name = parts[1]
                annotations[img_name] = class_name
        return annotations
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_name = list(self.annotations.keys())[idx]
        class_name = self.annotations[img_name]
        img_path = os.path.join(self.root_dir, "images", img_name)
        
        # Load the image
        image = Image.open(img_path).convert("RGB")
        
        # Apply transformations
        if self.transform:
            image = self.transform(image)
        
        # Convert the class name to an index
        class_idx = train_data.class_to_idx[class_name]
        
        return image, class_idx

In [16]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=200):
        super(AlexNet, self).__init__()

        # Convolutional Layer 1: Input (3, 227, 227) -> Output (96, 55, 55)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0)
        # Pooling Layer 1: Input (96, 55, 55) -> Output (96, 27, 27)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)

        # Convolutional Layer 2: Input (96, 27, 27) -> Output (256, 27, 27)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
        # Pooling Layer 2: Input (256, 27, 27) -> Output (256, 13, 13)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        # Convolutional Layer 3: Input (256, 13, 13) -> Output (384, 13, 13)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)

        # Convolutional Layer 4: Input (384, 13, 13) -> Output (384, 13, 13)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)

        # Convolutional Layer 5: Input (384, 13, 13) -> Output (256, 13, 13)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        # Convolutional Layer 5: Input (256, 13, 13) -> Output (256, 6, 6)
        self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)

        # Fully Connected Layer 1: Input (256 * 6 * 6) -> Output (4096)
        self.fc1 = nn.Linear(256 * 6 * 6, 4096)

        # Fully Connected Layer 2: Input (4096) -> Output (4096)
        self.fc2 = nn.Linear(4096, 4096)

        # Output Layer: Input (4096) -> Output (1000) (for 1000 ImageNet classes)
        self.fc3 = nn.Linear(4096, num_classes)

        # Dropout
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Apply Convolution Layer 1, followed by ReLU activation and Pooling
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        # Apply Convolution Layer 2, followed by ReLU activation and Pooling
        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        # Apply Convolution Layer 3, followed by ReLU activation
        x = F.relu(self.conv3(x))

        # Apply Convolution Layer 4, followed by ReLU activation
        x = F.relu(self.conv4(x))

        # Apply Convolution Layer 5, followed by ReLU activation and Pooling
        x = F.relu(self.conv5(x))
        x = self.pool5(x)

        # Flatten the feature maps for the Fully Connected Layers
        x = x.view(x.size(0), -1)

        # Apply Fully Connected Layer 1, followed by ReLU activation and Dropout
        x = F.relu(self.fc1(x))
        x = self.dropout(x)

        # Apply Fully Connected Layer 2, followed by ReLU activation and Dropout
        x = F.relu(self.fc2(x))
        x = self.dropout(x)

        # Output layer (no activation function here, as it’s handled during loss computation)
        x = self.fc3(x)

        return x

In [None]:
# Define the image transformations: Resize to 227x227, convert to tensor, normalize
transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizing using ImageNet statistics
])

# Load the training dataset
train_dir = '/kaggle/input/tiny-imagenet/tiny-imagenet-200/train'
train_data = ImageFolder(root=train_dir, transform=transform)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True, num_workers=2)

# Load the custom validation dataset
val_dir = '/kaggle/input/tiny-imagenet/tiny-imagenet-200/val'
val_data = TinyImageNetValDataset(root_dir=val_dir, transform=transform)
val_loader = DataLoader(val_data, batch_size=128, shuffle=False, num_workers=2)

# Instantiate the model
model = AlexNet(num_classes=200)
# Use a GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20  # You can adjust the number of epochs

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    # Use tqdm to create a progress bar for the training loop
    with tqdm(train_loader, unit="batch") as tepoch:
        for inputs, labels in tepoch:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Print loss
            running_loss += loss.item()

            # Update the progress bar description
            tepoch.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
            tepoch.set_postfix(loss=running_loss / (tepoch.n + 1))

    # Optionally, run validation after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total:.2f}%')

Epoch [1/20]:  11%|█▏        | 88/782 [13:29<1:46:54,  9.24s/batch, loss=5.33]

In [None]:
def imshow(img, title=None):
    img = img / 2 + 0.5  # unnormalize
    np_img = img.numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))
    if title is not None:
        plt.title(title)
    plt.show()

# Get 10 random images and their true labels
dataiter = iter(val_loader)
images, labels = next(dataiter)

# Pass through the model
model.eval()  # Set the model to evaluation mode
outputs = model(images.to(device))  # Move images to the same device as the model
_, predicted = torch.max(outputs, 1)

# Convert to CPU for visualization
images = images.cpu()
labels = labels.cpu()
predicted = predicted.cpu()

# Class mapping (assuming it's stored in train_data.class_to_idx)
class_names = list(train_data.class_to_idx.keys())

# Show 10 random images with true and predicted labels
for i in range(10):
    imshow(images[i], title=f'True: {class_names[labels[i]]}, Predicted: {class_names[predicted[i]]}')