# Imports

In [1]:
import torch 
import torch  # Main PyTorch library
import torch.nn as nn  # Neural networks module in PyTorch
import torch.optim as optim  # Optimization algorithms module in PyTorch
import torch.nn.functional as F  # Functional API that contains utility functions like activation functions
from torch.utils.data import DataLoader  # DataLoader for easy data loading and batching
import torchvision.datasets as datasets  # Datasets module for common datasets
import torchvision.transforms as transforms  # Transforms module for common image transformations

# Create Fully Connected Network

In [2]:
class NN(nn.Module):                                # Define a neural network class named NN, inheriting from nn.Module
    def __init__(self, input_size, num_classes):    # 784 (nodes) input size (28*28 images)
        super(NN, self).__init__()                  # Call the constructor of the parent class (nn.Module)
        self.fc1 = nn.Linear(input_size, 50)        # Define the first fully connected layer with input_size nodes and 50 output nodes   
        self.fc2 = nn.Linear(50, num_classes)       # Define the second fully connected layer with 50 input nodes and num_classes output nodes

   
    def forward(self, x):           # Define the forward pass of the neural network
        x = F.relu(self.fc1(x))     # Apply Rectified Linear Unit (ReLU) activation to the output of the first fully connected layer
        x = self.fc2(x)             # Pass the result through the second fully connected layer               
        return x                    # Return the final output
    
model = NN(784, 10)         # Instantiate the neural network model with input size 784 and 10 output classes
x = torch.randn(64, 784)    # Create a random input tensor with shape (64, 784)
print(model(x).shape)       # Print the shape of the output produced by the model


torch.Size([64, 10])


# set device

In [3]:
# Set the device to GPU ('cuda') if available, otherwise use CPU ('cpu')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters

In [4]:
# Define hyperparameters for the neural network training
input_size = 784            # Number of input features (e.g., flattened size of images)
num_classes = 10            # Number of output classes
learning_rate = 0.001       # Learning rate for the optimization algorithm
batch_size = 64             # Number of input samples in each mini-batch
num_epochs = 5              # Number of complete passes through the entire training dataset

# Load Data

In [7]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd

# Custom dataset class for the CSV data
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        image = self.data_frame.iloc[idx, 1:-1].values.astype('float32').reshape(28, 28)
        label = self.data_frame.iloc[idx, -1]

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations, if any (e.g., ToTensor)
transform = transforms.Compose([
    transforms.ToTensor()
])

# Load training and testing data from CSV files
train_dataset = CustomMNISTDataset(csv_file='train.csv', transform=transform)
test_dataset = CustomMNISTDataset(csv_file='test.csv', transform=transform)

# Create DataLoader instances
batch_size = 64  # Example batch size, adjust as needed
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)  # Typically no need to shuffle test data

# The train_loader and test_loader can now be used in a training loop

In [17]:
# # Create training and testing datasets using MNIST, applying transformations and downloading if necessary
# train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
# test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)

# # Create DataLoader instances for training and testing datasets, specifying batch size and enabling shuffling
# train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

# Initialize Network

In [8]:
# Instantiate the neural network model with specified input size and number of classes, and move it to the specified device (GPU or CPU)
model = NN(input_size=input_size, num_classes=num_classes).to(device)

# Loss and Optimizer

In [9]:
# Define the loss criterion as CrossEntropyLoss and the optimization algorithm as Adam
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train Network

In [10]:
# Training loop iterating over epochs
for epoch in range(num_epochs):
    # Iterate over batches in the training DataLoader
    for data, targets in train_loader:
        # Move data and targets to the specified device (GPU or CPU)
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # Reshape data to the correct shape (flattening for fully connected layers)
        data = data.reshape(data.shape[0], -1)
        
        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)
        
        # Backward pass and optimization step
        optimizer.zero_grad()  # Clear gradients from previous backward passes
        loss.backward()       # Backpropagation to compute gradients
        optimizer.step()       # Update model parameters using the optimizer


# Check accuracy on training 

In [21]:
# Define a function to check the accuracy of the model on a given DataLoader
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()  # Set the model to evaluation mode
    
    with torch.no_grad():
        # Iterate over batches in the DataLoader
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)
            
            # Forward pass to obtain predictions
            scores = model(x)
            
            # Find the index of the maximum score (predicted class) for each sample
            _, predictions = scores.max(1)
            
            # Count the number of correct predictions
            num_correct += (predictions == y).sum().item()
            
            # Update the total number of samples processed
            num_samples += predictions.size(0)
        
        # Calculate and print accuracy
        accuracy = float(num_correct) / float(num_samples) * 100
        print(f'Got {num_correct}/{num_samples} with accuracy {accuracy:.2f}')

In [22]:
check_accuracy(train_loader,model)
# check_accuracy(test_loader,model)

Got 56967/60000 with accuracy 94.95
Got 9462/10000 with accuracy 94.62
