#Exercises
## Download Tiny Imagenet dataset with 500 classes with 64x64 resolution.

##1) Modify LeNet for 64x64 size , perform training and testing on TinyImage dataset by dividing data 70:10:20 parts.[4 Marks]

##2) Do the same thing for Alexent [4 Marks]

##3) Provide observation related to better model for this dataset. [2 Marks]
## Note: Implement all models from Scratch.

In [1]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip -q tiny-imagenet-200.zip

--2024-11-13 03:05:26--  http://cs231n.stanford.edu/tiny-imagenet-200.zip
Resolving cs231n.stanford.edu (cs231n.stanford.edu)... 171.64.64.64
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cs231n.stanford.edu/tiny-imagenet-200.zip [following]
--2024-11-13 03:05:26--  https://cs231n.stanford.edu/tiny-imagenet-200.zip
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248100043 (237M) [application/zip]
Saving to: ‘tiny-imagenet-200.zip’


2024-11-13 03:06:01 (6.88 MB/s) - ‘tiny-imagenet-200.zip’ saved [248100043/248100043]



In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transforms
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# Load the dataset
data_dir = "tiny-imagenet-200"
dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=transform)

# Split the dataset
train_size = int(0.7 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_data, val_data, test_data = random_split(dataset, [train_size, val_size, test_size])

# Data loaders
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ModifiedLeNet(nn.Module):
    def __init__(self, num_classes=200):
        super(ModifiedLeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=5)  # Additional layer for larger images

        # We'll initialize fc1 after calculating the size dynamically in forward pass
        self.fc1 = None
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def _initialize_fc1(self, x):
        # This method is called in the first forward pass to initialize fc1 with the correct size
        num_features = x.view(x.size(0), -1).shape[1]
        self.fc1 = nn.Linear(num_features, 120).to(x.device)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)

        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer

        # Initialize fc1 dynamically on the first forward pass
        if self.fc1 is None:
            self._initialize_fc1(x)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



In [4]:
import torch.optim as optim

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model, criterion, and optimizer
model = ModifiedLeNet(num_classes=200).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Train the model
train(model, train_loader, criterion, optimizer, epochs=15)


Epoch [1/15], Loss: 5.1185
Epoch [2/15], Loss: 4.8105
Epoch [3/15], Loss: 4.6143
Epoch [4/15], Loss: 4.4776
Epoch [5/15], Loss: 4.3852
Epoch [6/15], Loss: 4.3128
Epoch [7/15], Loss: 4.2586
Epoch [8/15], Loss: 4.2121
Epoch [9/15], Loss: 4.1692
Epoch [10/15], Loss: 4.1342
Epoch [11/15], Loss: 4.0982
Epoch [12/15], Loss: 4.0742
Epoch [13/15], Loss: 4.0507
Epoch [14/15], Loss: 4.0255
Epoch [15/15], Loss: 3.9972


In [10]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Validation and testing
val_accuracy = evaluate(model, val_loader)
test_accuracy = evaluate(model, test_loader)
print(f"Validation Accuracy: {val_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")


Validation Accuracy: 12.94%
Test Accuracy: 12.53%


In [5]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Validation and testing
val_accuracy = evaluate(model, val_loader)
test_accuracy = evaluate(model, test_loader)
print(f"Validation Accuracy: {val_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")


KeyboardInterrupt: 

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet5(num_classes=200).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(15):
  print(f"Epoch {epoch}")
  train_loss, train_accuracy = train(model, train_loader, criterion, optimizer, device)
  val_loss, val_accuracy = evaluate(model, val_loader, criterion, device)

  print(f"Epoch {epoch+1}/{num_epochs}")
  print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy}")
  print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy}")

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ModifiedAlexNet(nn.Module):
    def __init__(self, num_classes=200):
        super(ModifiedAlexNet, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        # We'll initialize fc1 after calculating the size dynamically in the forward pass
        self.fc1 = None
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)

        # Dropout layers
        self.dropout = nn.Dropout(0.5)

    def _initialize_fc1(self, x):
        # This method calculates the size dynamically based on the input feature map size
        num_features = x.view(x.size(0), -1).shape[1]
        self.fc1 = nn.Linear(num_features, 4096).to(x.device)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = F.max_pool2d(x, kernel_size=3, stride=2)

        # Flatten the output for the fully connected layer
        x = x.view(x.size(0), -1)

        # Initialize fc1 dynamically on the first forward pass
        if self.fc1 is None:
            self._initialize_fc1(x)

        # Fully connected layers with ReLU and Dropout
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x


In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_alexNet = ModifiedAlexNet(num_classes=200).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss()

In [15]:
# Corrected code:
for epoch in range(15):
    print(f"Epoch {epoch+1}")
    train_loss, train_accuracy = train(model_alexNet, train_loader, criterion, optimizer)
    val_loss, val_accuracy = evaluate(model_alexNet, val_loader, criterion)

    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")


Epoch 1
Epoch [1/10], Loss: 5.2984
Epoch [2/10], Loss: 5.2984
Epoch [3/10], Loss: 5.2985
Epoch [4/10], Loss: 5.2985
Epoch [5/10], Loss: 5.2984
Epoch [6/10], Loss: 5.2985
Epoch [7/10], Loss: 5.2984
Epoch [8/10], Loss: 5.2984
Epoch [9/10], Loss: 5.2983
Epoch [10/10], Loss: 5.2984


TypeError: cannot unpack non-iterable NoneType object

In [None]:
test_loss, test_accuracy = evaluate(model_alexNet, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Based on accuracy curves, AlexNet achieves higher validation accuracy than LeNet, hence it may indicate that AlexNet's deeper architecture is better for handling Tiny ImageNet's more complex, larger-scale images.

However, LeNet is a simpler model, and hence trains faster than AlexNet. If computational resources or training time is a constraint, LeNet could be a better option with a small compromise in accuracy