<a href="https://colab.research.google.com/github/anubhavshrestha/Machine-Learning/blob/main/Task2AlexNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10

In [2]:
# defining necessary transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
# Load the CIFAR-10 dataset
train_dataset = CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = CIFAR10(root='./data', train=False, transform=transform, download=True)

# Split the training dataset into train and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:14<00:00, 11596568.48it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [9]:
# Initialize the AlexNet model
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),  # Corrected input size
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)  # Corrected flattening
        x = self.classifier(x)
        return x

In [10]:
model = AlexNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
   

In [11]:
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

In [12]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate validation accuracy
    val_accuracy = 100 * correct / total

    print(f"Epoch {epoch + 1} - Training Loss: {running_loss / len(train_loader):.3f}, Validation Accuracy: {val_accuracy:.2f}%")

print("Finished Training")

Epoch 1 - Training Loss: 2.181, Validation Accuracy: 28.92%
Epoch 2 - Training Loss: 1.740, Validation Accuracy: 42.09%
Epoch 3 - Training Loss: 1.461, Validation Accuracy: 52.81%
Epoch 4 - Training Loss: 1.279, Validation Accuracy: 54.97%
Epoch 5 - Training Loss: 1.112, Validation Accuracy: 64.61%
Epoch 6 - Training Loss: 1.010, Validation Accuracy: 67.21%
Epoch 7 - Training Loss: 0.898, Validation Accuracy: 70.77%
Epoch 8 - Training Loss: 0.821, Validation Accuracy: 72.77%
Epoch 9 - Training Loss: 0.760, Validation Accuracy: 73.56%
Epoch 10 - Training Loss: 0.707, Validation Accuracy: 75.80%
Finished Training


In [13]:
# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set: {100 * correct / total:.2f}%")


Accuracy on the test set: 75.26%


In [14]:
!pip install optuna
import optuna

Collecting optuna
  Downloading optuna-3.4.0-py3-none-any.whl (409 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m409.6/409.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.12.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.8/226.8 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.0 alembic-1.12.1 colorlog-6.7.0 optuna-3.4.0


In [15]:

# Define the objective function for hyperparameter tuning
def objective(trial):
    # Define and set hyperparameters
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)  # Learning rate in a logarithmic range

    # Initialize the LeNet model
    model = AlexNet()
    model.to(device)

    # Define loss and optimizer (use Adam with the suggested learning rate)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum = 0.9, weight_decay=5e-4)

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

    # Validation
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Return validation accuracy as Optuna aims to maximize the objective
    return correct / total

In [16]:
# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

# Get the best hyperparameters
best_params = study.best_params
best_lr = best_params["lr"]

# Reinitialize the model with the best hyperparameters
best_model = AlexNet()
best_model.to(device)
best_optimizer = optim.SGD(best_model.parameters(), lr=best_lr, momentum = 0.9, weight_decay = 5e-4)

# Training loop with the best hyperparameters
num_epochs = 20                     # training for 20 epochs with best hyperparameters
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        best_optimizer.zero_grad()

        outputs = best_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        best_optimizer.step()

        running_loss += loss.item()

    # Validation
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = best_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch + 1} - Training Loss: {running_loss / len(train_loader):.3f}, Validation Accuracy: {100 * correct / total:.2f}%")

print("Finished Training")


[I 2023-11-20 19:18:44,860] A new study created in memory with name: no-name-eee5625a-8c45-48cc-a680-1dcce230f5b5
[I 2023-11-20 19:24:56,478] Trial 0 finished with value: 0.2658 and parameters: {'lr': 0.0005798401107628277}. Best is trial 0 with value: 0.2658.
[I 2023-11-20 19:31:06,453] Trial 1 finished with value: 0.6983 and parameters: {'lr': 0.0441822566985148}. Best is trial 1 with value: 0.6983.
[I 2023-11-20 19:37:15,769] Trial 2 finished with value: 0.299 and parameters: {'lr': 0.0006322024639041442}. Best is trial 1 with value: 0.6983.
[I 2023-11-20 19:43:25,969] Trial 3 finished with value: 0.6118 and parameters: {'lr': 0.00273789848532697}. Best is trial 1 with value: 0.6983.
[I 2023-11-20 19:49:36,472] Trial 4 finished with value: 0.6082 and parameters: {'lr': 0.003066796936818444}. Best is trial 1 with value: 0.6983.
[I 2023-11-20 19:55:47,272] Trial 5 finished with value: 0.0939 and parameters: {'lr': 1.4989915672285942e-05}. Best is trial 1 with value: 0.6983.
[I 2023-11

Epoch 1 - Training Loss: 2.054, Validation Accuracy: 32.79%
Epoch 2 - Training Loss: 1.596, Validation Accuracy: 45.69%
Epoch 3 - Training Loss: 1.345, Validation Accuracy: 57.19%
Epoch 4 - Training Loss: 1.145, Validation Accuracy: 58.88%
Epoch 5 - Training Loss: 1.017, Validation Accuracy: 68.00%
Epoch 6 - Training Loss: 0.909, Validation Accuracy: 67.52%
Epoch 7 - Training Loss: 0.824, Validation Accuracy: 72.25%
Epoch 8 - Training Loss: 0.764, Validation Accuracy: 72.55%
Epoch 9 - Training Loss: 0.705, Validation Accuracy: 75.95%
Epoch 10 - Training Loss: 0.664, Validation Accuracy: 76.95%
Epoch 11 - Training Loss: 0.618, Validation Accuracy: 76.18%
Epoch 12 - Training Loss: 0.584, Validation Accuracy: 78.13%
Epoch 13 - Training Loss: 0.560, Validation Accuracy: 79.72%
Epoch 14 - Training Loss: 0.527, Validation Accuracy: 79.20%
Epoch 15 - Training Loss: 0.504, Validation Accuracy: 80.46%
Epoch 16 - Training Loss: 0.492, Validation Accuracy: 80.63%
Epoch 17 - Training Loss: 0.471, 

In [17]:
# Test the model with the best hyperparameters
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = best_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set with best hyperparameters: {100 * correct / total:.2f}%")

Accuracy on the test set with best hyperparameters: 80.95%


# TRANSFER LEARNING USING PRE-TRAINED ALEXNET

In [25]:
# Transfer Learning on Alexnet
from torchvision.models import alexnet

# Define transformations
transform_train = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Load CIFAR-10 dataset
trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
testset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create data loaders
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load pre-trained AlexNet
model = alexnet(pretrained=True)

# Modify the last fully connected layer for CIFAR-10 (10 classes)
num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 10)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss function, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate validation accuracy
    val_accuracy = 100 * correct / total

    print(f"Epoch {epoch + 1} - Training Loss: {running_loss / len(train_loader):.3f}, Validation Accuracy: {val_accuracy:.2f}%")

print("Finished Training")


Files already downloaded and verified
Files already downloaded and verified




Epoch 1 - Training Loss: 0.874, Validation Accuracy: 80.06%
Epoch 2 - Training Loss: 0.564, Validation Accuracy: 82.99%
Epoch 3 - Training Loss: 0.469, Validation Accuracy: 85.26%
Epoch 4 - Training Loss: 0.417, Validation Accuracy: 85.58%
Epoch 5 - Training Loss: 0.369, Validation Accuracy: 87.00%
Epoch 6 - Training Loss: 0.337, Validation Accuracy: 87.42%
Epoch 7 - Training Loss: 0.301, Validation Accuracy: 87.78%
Epoch 8 - Training Loss: 0.286, Validation Accuracy: 88.28%
Epoch 9 - Training Loss: 0.263, Validation Accuracy: 88.24%
Epoch 10 - Training Loss: 0.237, Validation Accuracy: 88.30%
Finished Training


In [27]:
# Test the model with the transfer learning alexnet
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set with transfer learning: {100 * correct / total:.2f}%")

Accuracy on the test set with transfer learning: 88.95%
