**Training + Testing**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create Fully Connected Layers
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)  # First hidden layer
        self.fc2 = nn.Linear(256, 128)          # Second hidden layer
        self.fc3 = nn.Linear(128, 64)           # Third hidden layer
        self.fc4 = nn.Linear(64, 32)            # Fourth hidden layer
        self.fc5 = nn.Linear(32, 16)            # Fifth hidden layer
        self.fc6 = nn.Linear(16, num_classes)   # Output layer

    def forward(self, x):
        x = F.relu(self.fc1(x))  # Activation for first hidden layer
        x = F.relu(self.fc2(x))  # Activation for second hidden layer
        x = F.relu(self.fc3(x))  # Activation for third hidden layer
        x = F.relu(self.fc4(x))  # Activation for fourth hidden layer
        x = F.relu(self.fc5(x))  # Activation for fifth hidden layer
        x = self.fc6(x)           # Output layer (no activation function here)
        return x

# Hyperparameters
# input_size = 3 * 224 * 224  # Adjusted for resized input (224x224 with 3 channels color images)
input_size = 784              # Adjusted for resized input (28x28 with grayscale images, e.g., MNIST)
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 5  # Increased number of epochs

# Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize for model input
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to RGB
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Load the full MNIST dataset
# train_dataset = datasets.MNIST(root='./data', train=True, transform=transform.ToTensor(), download=True)
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
# Create DataLoader for training
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
# Load the separate test dataset
# test_dataset = datasets.MNIST(root='./data', train=False, transform=transform.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Initialize Network
model = NN(input_size=input_size, num_classes=num_classes).to(device)

# Loss and Optimizer
# criterion = nn.BCELoss()  # Binary Cross Entropy loss for sigmoid output
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train Network
for epoch in range(num_epochs):
    model.train()
    total_loss = 0  # Initialize total loss for the epoch
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device).reshape(data.size(0), -1)  # Flatten the input
        targets = targets.to(device)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()  # Accumulate the loss

    # Print total loss at the end of the epoch
    print(f'Epoch [{epoch + 1}/{num_epochs}] Total Loss: {total_loss:.4f}')

# Check accuracy on training and test data
def check_accuracy(loader, model, device):
    model.eval()
    num_correct = 0
    num_samples = 0

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device).reshape(x.size(0), -1)  # Flatten the input
            y = y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    accuracy = 100.0 * num_correct / num_samples
    return accuracy

# Print final accuracies
training_accuracy = check_accuracy(train_loader, model, device)
test_accuracy = check_accuracy(test_loader, model, device)

print(f'Training Accuracy: {training_accuracy:.2f}%')
print(f'Test Accuracy: {test_accuracy:.2f}%')

Epoch [1/5] Total Loss: 396.8891
Epoch [2/5] Total Loss: 184.3879
Epoch [3/5] Total Loss: 148.3295
Epoch [4/5] Total Loss: 128.5638
Epoch [5/5] Total Loss: 113.8432
Training Accuracy: 97.04%
Test Accuracy: 96.42%


**Training + Validation + Testing**

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create Fully Connected Layers
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # First hidden layer with 128 neurons
        self.fc2 = nn.Linear(128, 64)         # Second hidden layer with 64 neurons
        self.fc3 = nn.Linear(64, 32)          # Third hidden layer with 32 neurons
        self.fc4 = nn.Linear(32, 16)          # Fourth hidden layer with 16 neurons
        self.fc5 = nn.Linear(16, num_classes) # Output layer

    def forward(self, x):
        x = F.relu(self.fc1(x))  # First hidden layer
        x = F.relu(self.fc2(x))  # Second hidden layer
        x = F.relu(self.fc3(x))  # Third hidden layer
        x = F.relu(self.fc4(x))  # Fourth hidden layer
        x = self.fc5(x)          # Output layer
        return x

# Hyperparameters
input_size = 28 * 28          # Grayscale MNIST images (28x28 pixels)
# input_size = 3 * 224 * 224    # Adjusted for resized input (224x224 with 3 channels color images)
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 5

# Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),  # Normalize grayscale (1 channel)
])

# Load the full MNIST dataset
full_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
# full_dataset = datasets.MNIST(root='./data', train=True, transform=transform.ToTensor(), download=True)

# Split the training dataset into training and validation sets
train_size = int(0.8 * len(full_dataset))  # 80% for training
val_size = len(full_dataset) - train_size  # Remaining 20% for validation
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create DataLoaders for training and validation
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

# Load the separate test dataset
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
# test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Initialize Network
model = NN(input_size=input_size, num_classes=num_classes).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss() # Categorical Cross Entropy loss for more than  2 classes
# criterion = nn.BCELoss()        # Binary Cross Entropy loss for sigmoid output (2 classes)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train Network
for epoch in range(num_epochs):
    model.train()
    total_loss = 0  # Initialize total loss for the epoch
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device).reshape(data.size(0), -1)  # Flatten the input
        targets = targets.to(device)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()  # Accumulate the loss

    # Print total loss at the end of the epoch
    print(f'Epoch [{epoch + 1}/{num_epochs}] Total Loss: {total_loss:.4f}')

# Check accuracy on training and test data
def check_accuracy(loader, model, device):
    model.eval()
    num_correct = 0
    num_samples = 0

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device).reshape(x.size(0), -1)  # Flatten the input
            y = y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    accuracy = 100.0 * num_correct / num_samples
    return accuracy

# Print final accuracies
training_accuracy = check_accuracy(train_loader, model, device)
validation_accuracy = check_accuracy(val_loader, model, device)
test_accuracy = check_accuracy(test_loader, model, device)

print(f'Training Accuracy: {training_accuracy:.2f}%')
print(f'Validation Accuracy: {validation_accuracy:.2f}%')
print(f'Test Accuracy: {test_accuracy:.2f}%')

Epoch [1/5] Total Loss: 424.5400
Epoch [2/5] Total Loss: 186.8466
Epoch [3/5] Total Loss: 138.2378
Epoch [4/5] Total Loss: 112.8730
Epoch [5/5] Total Loss: 97.3090
Training Accuracy: 96.83%
Validation Accuracy: 95.99%
Test Accuracy: 95.64%
