In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

Here we are defining the model class itself. In this case, it is an old-school 3 layer perceptron. The object takes 3 numbers as input: the sizes of the input, hidden, and output layers. The nodes use a logistic activation function.

In [2]:
class Perceptron(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Perceptron, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        #Here we initialize the weight matrices:
        self.i2h = nn.Linear(self.input_size, self.hidden_size)
        self.h2o = nn.Linear(self.hidden_size, self.output_size)
        
        #Here we initialize the activation function:
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, input_):
        #First we propagate activation from the input to the hidden layer, and then we apply the 
        #activation function:
        h_hat = self.i2h(input_)
        h_hat = self.sigmoid(h_hat)
        
        #Now we do the same from the hidden layer to the output layer, and again we apply the
        #activation function:
        o_hat = self.h2o(h_hat)
        o_hat = self.sigmoid(o_hat)
        return o_hat

Here we build the model with 8 input nodes, 3 hidden nodes, and 8 output nodes:

In [3]:
model = Perceptron(8, 3, 8)

Here we are defining the input/output pairs we want to train the model on:

In [4]:
input1 = torch.Tensor([1., 0., 0., 0., 0., 0., 0., 0.])
output1 = torch.Tensor([1., 0., 0., 0., 0., 0., 0., 0.])

input2 = torch.Tensor([0., 1., 0., 0., 0., 0., 0., 0.])
output2 = torch.Tensor([0., 1., 0., 0., 0., 0., 0., 0.])

input3 = torch.Tensor([0., 0., 1., 0., 0., 0., 0., 0.])
output3 = torch.Tensor([0., 0., 1., 0., 0., 0., 0., 0.])

input4 = torch.Tensor([0., 0., 0., 1., 0., 0., 0., 0.])
output4 = torch.Tensor([0., 0., 0., 1., 0., 0., 0., 0.])

input5 = torch.Tensor([0., 0., 0., 0., 1., 0., 0., 0.])
output5 = torch.Tensor([0., 0., 0., 0., 1., 0., 0., 0.])

input6 = torch.Tensor([0., 0., 0., 0., 0., 1., 0., 0.])
output6 = torch.Tensor([0., 0., 0., 0., 0., 1., 0., 0.])

input7 = torch.Tensor([0., 0., 0., 0., 0., 0., 1., 0.])
output7 = torch.Tensor([0., 0., 0., 0., 0., 0., 1., 0.])

input8 = torch.Tensor([0., 0., 0., 0., 0., 0., 0., 1.])
output8 = torch.Tensor([0., 0., 0., 0., 0., 0., 0., 1.])

training_set = [
    [input1, output1],
    [input2, output2],
    [input3, output3],
    [input4, output4],
    [input5, output5],
    [input6, output6],
    [input7, output7],
    [input8, output8]]

Here we define the loss function and optimizer that will be used to train the model. I am using Mean Squared Error as the loss function, and Stochastic Gradient Descent as the optimizer:

In [5]:
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.5, momentum = 0.9)

This loop trains the model on the patterns in the training set. It uses backpropagation of error, and runs until the model performs above a threshold or if it has trained for too long without converging:

In [6]:
converged = False
iteration = 0

threshold = 0.01
stop = 10000

while converged == False:
    sum_loss = 0.0
    for pattern in training_set:
        inputs = pattern[0]
        outputs = pattern[1]        
        predicted_output = model(inputs)
        loss = loss_fn(predicted_output, outputs)
        sum_loss += loss
    optimizer.zero_grad()
    sum_loss.backward()
    optimizer.step()
        
    iteration += 1
    
    if iteration%500 == 0:
        print(iteration, "iterations.", "Loss:", sum_loss.item())
        
    if sum_loss < threshold or iteration > stop:
        converged = True

print('Finished Training')
print(iteration, "iterations")

500 iterations. Loss: 0.13662360608577728
1000 iterations. Loss: 0.014872796833515167
Finished Training
1306 iterations


Now we can test the network on any input:

In [7]:
model(input1)

tensor([9.3894e-01, 1.3457e-06, 3.2475e-02, 1.8667e-05, 5.0499e-02, 2.6299e-02,
        1.0270e-07, 1.8378e-02], grad_fn=<SigmoidBackward>)