# Train a neural network to do multiplication

In [1]:
import torch
import torch.nn.functional as F # F contains activation functions

# Affix the randomness so that we get the same results each time we run this notebook.
torch.manual_seed(1)

<torch._C.Generator at 0x109d4eef0>

## Step 1: Gather data

In [2]:
# We have little data, so our network will not generalize very well
inputs = [
    [2, 4],
    [1, 3],
    [-2, 2],
    # Adding more inputs to avoid overfitting
    [1, 2],
    [-1, 2],
    [2, 5],
    [2, 1]
]

outputs = [
    [8],
    [3],
    [-4],
    # Adding more outputs to avoid overfitting
    [2],
    [-2],
    [10],
    [2]
]

## Step 2: Define architecture

In [3]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define layers we want in the network
        # This part is the "art" of neural nets
        # Too many neurons and we will over-fit
        # Let's go with 4 -> 8 -> 4 for the hidden layers
        # There is also 2 input neurons and 1 output neuron
        self.layer1 = torch.nn.Linear(2, 4) # 2 inputs, 4 outputs
        self.layer2 = torch.nn.Linear(4, 8)
        self.layer3 = torch.nn.Linear(8, 4)
        self.layer4 = torch.nn.Linear(4, 1)
    
    def forward(self, x):
        """
        All networks must have this function to define how data flows through the network.
        x -- The input tensor. In our case it will be a 2x1 tensor.
        """
        # In forward, we manually propigate the input in through the layers and define activation functions
        x = self.layer1(x)
        x = F.relu(x) # relu is our if x < 0 return 0 else return x
        
        x = self.layer2(x)
        x = F.relu(x)
        
        x = self.layer3(x)
        x = F.relu(x)
        
        x = self.layer4(x)
        # No activation for last layer
        # Because we want to return a continuous real number. Not just postivie numbers.
        
        return x
        

In [4]:
net = Net()
net

Net(
  (layer1): Linear(in_features=2, out_features=4, bias=True)
  (layer2): Linear(in_features=4, out_features=8, bias=True)
  (layer3): Linear(in_features=8, out_features=4, bias=True)
  (layer4): Linear(in_features=4, out_features=1, bias=True)
)

## Step 3: Train

In [5]:
# convert input to tensors
tensor_in = torch.tensor(inputs).float()
expected = torch.tensor(outputs).float()

# define loss function
criterion = torch.nn.MSELoss() # appropriate for continuous output numbers

# define optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.001) # lr = learning rate

In [6]:
# Training loop
for i in range(1000):
    net.zero_grad() # Make sure each pass of loop has a clean network
    output = net(tensor_in)
    loss = criterion(output, expected)
    loss.backward()
    optimizer.step()

In [7]:
print(net(tensor_in))

tensor([[ 7.9879],
        [ 3.6484],
        [-3.0712],
        [ 1.8739],
        [-2.8785],
        [ 9.7624],
        [ 1.8701]], grad_fn=<AddmmBackward>)


In [8]:
test_values = torch.tensor([
    [1, 4],
    [2, 3]
]).float()

In [9]:
print(net(test_values))

tensor([[5.4229],
        [6.0829]], grad_fn=<AddmmBackward>)
