<a href="https://colab.research.google.com/github/anubhavshrestha/Machine-Learning/blob/main/Task1_Lenet_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# importing necessary modules
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

In [2]:
#LeNet Model

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # 10 output units for 10 classes

    def forward(self, x):
        x = self.pool(torch.tanh(self.conv1(x)))
        x = self.pool(torch.tanh(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x


In [3]:
# Define transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load the MNIST dataset
train_dataset = MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = MNIST(root='./data', train=False, transform=transform, download=True)

# Split the training dataset into training and validation sets
total_train_samples = len(train_dataset)
train_samples = int(0.8 * total_train_samples)
valid_samples = total_train_samples - train_samples

train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_samples, valid_samples])

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 40128516.43it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 117152508.53it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 46529985.92it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 17854291.25it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
# Defining loss and optimizer
model = LeNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [5]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted_train = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted_train == labels).sum().item()

    # Validation
    correct_valid = 0
    total_valid = 0
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted_valid = torch.max(outputs.data, 1)
            total_valid += labels.size(0)
            correct_valid += (predicted_valid == labels).sum().item()
            val_loss += criterion(outputs, labels).item()

    print(f"Epoch {epoch + 1} - Training Loss: {running_loss / len(train_loader):.3f}, Training Accuracy: {100 * correct_train / total_train:.2f}%, Validation Loss: {val_loss / len(valid_loader):.3f}, Validation Accuracy: {100 * correct_valid / total_valid:.2f}%")

print("Training Finished. Now testing!")

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set: {100 * correct / total:.2f}%")

Epoch 1 - Training Loss: 0.268, Training Accuracy: 92.14%, Validation Loss: 0.096, Validation Accuracy: 97.06%
Epoch 2 - Training Loss: 0.075, Training Accuracy: 97.69%, Validation Loss: 0.076, Validation Accuracy: 97.63%
Epoch 3 - Training Loss: 0.052, Training Accuracy: 98.38%, Validation Loss: 0.071, Validation Accuracy: 97.72%
Epoch 4 - Training Loss: 0.039, Training Accuracy: 98.79%, Validation Loss: 0.057, Validation Accuracy: 98.21%
Epoch 5 - Training Loss: 0.030, Training Accuracy: 99.12%, Validation Loss: 0.064, Validation Accuracy: 98.03%
Epoch 6 - Training Loss: 0.023, Training Accuracy: 99.26%, Validation Loss: 0.052, Validation Accuracy: 98.41%
Epoch 7 - Training Loss: 0.018, Training Accuracy: 99.45%, Validation Loss: 0.057, Validation Accuracy: 98.39%
Epoch 8 - Training Loss: 0.016, Training Accuracy: 99.51%, Validation Loss: 0.073, Validation Accuracy: 97.93%
Epoch 9 - Training Loss: 0.013, Training Accuracy: 99.62%, Validation Loss: 0.059, Validation Accuracy: 98.38%
E

In [6]:
!pip install optuna # for hyperparameter tuning

Collecting optuna
  Downloading optuna-3.4.0-py3-none-any.whl (409 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m409.6/409.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.12.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.8/226.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.12.1 colorlog-6.7.0 optuna-3.4.0


In [7]:
import optuna

In [8]:
# Define the objective function for hyperparameter tuning
def objective(trial):
    # Define and set hyperparameters
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)  # Learning rate in a logarithmic range

    # Initialize the LeNet model
    model = LeNet()

    # Define loss and optimizer (use Adam with the suggested learning rate)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

    # Validation
    correct = 0
    total = 0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Return validation accuracy as Optuna aims to maximize the objective
    return correct / total

In [9]:
# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

# Get the best hyperparameters
best_params = study.best_params
best_lr = best_params["lr"]

# Reinitialize the model with the best hyperparameters
best_model = LeNet()
best_optimizer = optim.Adam(best_model.parameters(), lr=best_lr)

# Training loop with the best hyperparameters
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        best_optimizer.zero_grad()

        outputs = best_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        best_optimizer.step()

        running_loss += loss.item()

    # Validation
    correct = 0
    total = 0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data
            outputs = best_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch + 1} - Training Loss: {running_loss / len(train_loader):.3f}, Validation Accuracy: {100 * correct / total:.2f}%")

print("Finished Training")

# Test the model with the best hyperparameters
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = best_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set with best hyperparameters: {100 * correct / total:.2f}%")

[I 2023-10-30 06:53:45,545] A new study created in memory with name: no-name-02b2e8e0-5c3d-4301-88c2-0106348cc9f8
[I 2023-10-30 06:56:33,672] Trial 0 finished with value: 0.9274166666666667 and parameters: {'lr': 0.015637274826534726}. Best is trial 0 with value: 0.9274166666666667.
[I 2023-10-30 06:59:19,468] Trial 1 finished with value: 0.7261666666666666 and parameters: {'lr': 0.03535158759268208}. Best is trial 0 with value: 0.9274166666666667.
[I 2023-10-30 07:02:06,400] Trial 2 finished with value: 0.8298333333333333 and parameters: {'lr': 0.030824415357067918}. Best is trial 0 with value: 0.9274166666666667.
[I 2023-10-30 07:04:55,814] Trial 3 finished with value: 0.95475 and parameters: {'lr': 3.4004000983120935e-05}. Best is trial 3 with value: 0.95475.
[I 2023-10-30 07:07:45,308] Trial 4 finished with value: 0.9661666666666666 and parameters: {'lr': 5.116416632880259e-05}. Best is trial 4 with value: 0.9661666666666666.
[I 2023-10-30 07:10:34,898] Trial 5 finished with value:

Epoch 1 - Training Loss: 0.489, Validation Accuracy: 94.94%
Epoch 2 - Training Loss: 0.122, Validation Accuracy: 96.87%
Epoch 3 - Training Loss: 0.081, Validation Accuracy: 97.49%
Epoch 4 - Training Loss: 0.063, Validation Accuracy: 97.94%
Epoch 5 - Training Loss: 0.052, Validation Accuracy: 98.22%
Epoch 6 - Training Loss: 0.044, Validation Accuracy: 98.36%
Epoch 7 - Training Loss: 0.038, Validation Accuracy: 98.10%
Epoch 8 - Training Loss: 0.033, Validation Accuracy: 98.37%
Epoch 9 - Training Loss: 0.029, Validation Accuracy: 98.60%
Epoch 10 - Training Loss: 0.025, Validation Accuracy: 98.55%
Finished Training
Accuracy on the test set with best hyperparameters: 98.63%
