<a href="https://colab.research.google.com/github/eladorpBG/ML_assignment3/blob/main/PyTorch_ch11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**PyTorch Implementation**

In [7]:
!pip install torch



In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, random_split
from torchvision import datasets, transforms

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load the entire MNIST dataset (train + test combined)
full_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset_original = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Combine the train and test datasets
full_dataset = torch.utils.data.ConcatDataset([full_dataset, test_dataset_original])

# Compute sizes for train (70%) and test (30%) split
total_size = len(full_dataset)
train_size = int(0.7 * total_size)
test_size = total_size - train_size

# Split the full dataset
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Split train dataset into train and validation datasets
val_size = 5000
train_size_final = train_size - val_size

train_dataset, val_dataset = random_split(train_dataset, [train_size_final, val_size])

# Create DataLoaders for bathces
batch_size = 200
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [18]:
# Define the ANN Model
class ANN(nn.Module):
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden1_size)
        self.fc2 = nn.Linear(hidden1_size, hidden2_size)
        self.fc3 = nn.Linear(hidden2_size, output_size)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))  # Output layer with Softmax activation
        return x

In [19]:
# Hyperparameters
input_size = 28 * 28  # MNIST images are 28x28 pixels
hidden1_size = 500
hidden2_size = 500
output_size = 10
learning_rate = 0.1
num_epochs = 20

# Initialize the model, loss function, and optimizer
model = ANN(input_size, hidden1_size, hidden2_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training Loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    train_loss = 0
    for images, labels in train_loader:
        # Convert labels to one-hot encoding
        labels_one_hot = torch.zeros(labels.size(0), output_size)
        labels_one_hot.scatter_(1, labels.unsqueeze(1), 1)

        optimizer.zero_grad()  # Clear gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels_one_hot)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation
    model.eval()  # Set model to evaluation mode
    val_loss = 0
    correct = 0
    with torch.no_grad():  # Disable gradient computation for validation
        for images, labels in val_loader:
            labels_one_hot = torch.zeros(labels.size(0), output_size)
            labels_one_hot.scatter_(1, labels.unsqueeze(1), 1)

            outputs = model(images)
            loss = criterion(outputs, labels_one_hot)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / len(val_dataset)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")


Epoch [1/20], Train Loss: 0.0888, Val Loss: 0.0872, Val Accuracy: 47.74%
Epoch [2/20], Train Loss: 0.0845, Val Loss: 0.0803, Val Accuracy: 44.30%
Epoch [3/20], Train Loss: 0.0728, Val Loss: 0.0637, Val Accuracy: 58.30%
Epoch [4/20], Train Loss: 0.0551, Val Loss: 0.0463, Val Accuracy: 74.52%
Epoch [5/20], Train Loss: 0.0405, Val Loss: 0.0347, Val Accuracy: 79.84%
Epoch [6/20], Train Loss: 0.0312, Val Loss: 0.0273, Val Accuracy: 85.42%
Epoch [7/20], Train Loss: 0.0254, Val Loss: 0.0229, Val Accuracy: 87.08%
Epoch [8/20], Train Loss: 0.0219, Val Loss: 0.0204, Val Accuracy: 87.98%
Epoch [9/20], Train Loss: 0.0198, Val Loss: 0.0186, Val Accuracy: 88.72%
Epoch [10/20], Train Loss: 0.0184, Val Loss: 0.0174, Val Accuracy: 89.48%
Epoch [11/20], Train Loss: 0.0173, Val Loss: 0.0166, Val Accuracy: 89.66%
Epoch [12/20], Train Loss: 0.0165, Val Loss: 0.0160, Val Accuracy: 89.90%
Epoch [13/20], Train Loss: 0.0159, Val Loss: 0.0154, Val Accuracy: 90.22%
Epoch [14/20], Train Loss: 0.0154, Val Loss: 0.

In [20]:
# Testing
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        labels_one_hot = torch.zeros(labels.size(0), output_size)
        labels_one_hot.scatter_(1, labels.unsqueeze(1), 1)

        outputs = model(images)
        loss = criterion(outputs, labels_one_hot)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()

test_loss /= len(test_loader)
test_accuracy = 100 * correct / len(test_dataset)

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 0.0140, Test Accuracy: 90.99%
