In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [13]:
class SimpleMLP(nn.Module):
    def __init__(self, input_size = 784, hidden_size = 128 , output_size = 10):
        super(SimpleMLP,self).__init__()
        
        # Layers
        self.fc1 = nn.Linear(input_size, hidden_size)  # first hidden layer
        self.fc2 = nn.Linear(hidden_size, hidden_size) # second hidden layer
        self.fc3 = nn.Linear(hidden_size, output_size) # output layer

    def forward(self, x):
        # Flatten input (batch_size, 1, 28, 28) -> (batch_size, 784)
        x = x.view(x.size(0), -1)  
        
        # Hidden layer 1 with ReLU
        x = F.relu(self.fc1(x))
        
        # Hidden layer 2 with ReLU
        x = F.relu(self.fc2(x))
        
        # Output layer with Softmax (for classification)
        x = F.softmax(self.fc3(x), dim=1)
        
        return x

In [14]:
model = SimpleMLP()
dummy_input = torch.randn(2, 1, 28, 28) 
# creates a tensor filled with random numbers drawn from a standard normal distribution (mean=0, standard deviation=1).
# 2: Batch size - this creates 2 samples
# 1: Number of channels - typically 1 for grayscale images
# 28, 28: Height and width - creates 28x28 pixel images
output = model(dummy_input)

print(output)  # probabilities for 10 classes
print(output.sum(dim=1))  #should sum to 1 for each sample

tensor([[0.1042, 0.1172, 0.0899, 0.0881, 0.1142, 0.0920, 0.0954, 0.1019, 0.1019,
         0.0951],
        [0.1145, 0.1044, 0.0953, 0.1001, 0.1113, 0.0938, 0.1061, 0.0899, 0.0898,
         0.0948]], grad_fn=<SoftmaxBackward0>)
tensor([1., 1.], grad_fn=<SumBackward1>)
