# MLP with Single Hidden Layer using PyTorch

1. Define an MLP with variable number of inputs (num_inputs), outputs (num_outputs), and nodes in hidden layer (num_hidden_layer_nodes).  
2. Use ReLU activation for each node 
3. Use MSE loss
4. Use SGD optimizer


<img src="https://www.learnopencv.com/wp-content/uploads/2020/01/mlp.png" alt="mlp" width="500"/>


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Get reproducible results
torch.manual_seed(0)

# Define the model
class MLP(nn.Module):
    def __init__(self, num_inputs, num_hidden_layer_nodes, num_outputs):
        # Initialize super class
        super().__init__() #this class inherits from nn.Module, so we call the constructor of nn.Module 
                           #(the superclass/father) using super() to use its constructor

        # Add hidden layer 
        self.linear1 = nn.Linear(num_inputs, num_hidden_layer_nodes)

        # Add output layer
        self.linear2 = nn.Linear(num_hidden_layer_nodes, num_outputs)
        

    def forward(self, x):
        # Forward pass through hidden layer with 
        x = F.relu(self.linear1(x))
        
        # Foward pass to output layer
        return self.linear2(x)

# Num data points
num_data = 1000

# Network parameters
num_inputs = 1000
num_hidden_layer_nodes = 100
num_outputs = 10

# Training parameters
num_epochs = 100 

# Create random Tensors to hold inputs and outputs
x = torch.randn(num_data, num_inputs)
y = torch.randn(num_data, num_outputs)

# Construct our model by instantiating the class defined above
model = MLP(num_inputs, num_hidden_layer_nodes, num_outputs)

# Define loss function
loss_function = nn.MSELoss(reduction='sum') # reduction='sum' means we are not using the mean, we are simplyfing to SE

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-4) # lr = 0.0001


for t in range(num_epochs):

    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = loss_function(y_pred, y)
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()

    # Calculate gradient using backward pass
    loss.backward()

    # Update model parameters (weights)
    optimizer.step()
        

0 10581.46484375
1 9755.71875
2 9161.2998046875
3 8637.4951171875
4 8135.08154296875
5 7634.916015625
6 7127.3212890625
7 6610.970703125
8 6087.37255859375
9 5563.39599609375
10 5048.94873046875
11 4551.427734375
12 4078.5126953125
13 3634.7451171875
14 3225.676025390625
15 2852.951904296875
16 2516.31201171875
17 2214.470947265625
18 1946.52880859375
19 1709.119873046875
20 1499.035400390625
21 1313.6361083984375
22 1151.3741455078125
23 1009.0541381835938
24 883.8927001953125
25 774.8145141601562
26 679.2298583984375
27 596.1323852539062
28 524.0944213867188
29 462.283935546875
30 411.1572570800781
31 372.4652099609375
32 353.1527404785156
33 370.260498046875
34 465.5327453613281
35 733.2030639648438
36 1382.11962890625
37 2739.392578125
38 5025.6025390625
39 6997.73095703125
40 6102.787109375
41 2681.7080078125
42 893.98876953125
43 422.0854187011719
44 282.61419677734375
45 214.14593505859375
46 170.4607391357422
47 139.28244018554688
48 115.85263061523438
49 97.65865325927734
50 8