<a href="https://colab.research.google.com/github/enrico-atzeni/pytorch-simple-add-net/blob/main/Learn_to_add.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [154]:
# Lets import some libraries 
import torch # PyTorch 
import torch.nn as nn
import numpy as np
import math
import random
from torch.autograd import Variable

In [155]:
# allow reproducibility for find better model and window and horizon size
#seed = random.randrange(10000000)
# this is a good seed, extracted from some random seed consecutive tests
seed = 5787229

torch.manual_seed(seed)
random.seed(seed)
print (seed)

5787229


In [156]:
# Define Model
class MaiModel(nn.Module):
  def __init__(self, input_size, output_size):
    super(MaiModel, self).__init__()

    self.fc1 = nn.Linear(input_size, output_size)
  
  def forward(self, x):
    y = self.fc1(x)
    return y

In [157]:
# Define the dataset

# shape: 10,2
x_values = [[random.randrange(10), random.randrange(10)] for i in range(10)]

# use whole dataset as batch of 10 items
x_train = np.array(x_values, dtype=np.float32)

y_values = [[sum(i)] for i in x_values]

# use whole dataset as batch of len(x_values) items
y_train = np.array(y_values, dtype=np.float32)

input_size = 2


In [158]:
# DATASET LINEAR EASY
if False:
  x_values = [i for i in range(11)]
  x_train = np.array(x_values, dtype=np.float32)
  x_train = x_train.reshape(-1, 1)

  y_values = [2*i+1 for i in x_values]
  y_train = np.array(y_values, dtype=np.float32)
  y_train = y_train.reshape(-1, 1)

  input_size = 1

In [159]:
# print dataset for debug
print ("This is the dataset")
for i in range(len(x_values)):
  print ('%d + %d = %d' % (int(x_values[i][0]), int(x_values[i][1]), int(y_values[i][0])))

print (" ")
print ("X shape is %s" % (str(torch.tensor(x_values).shape)))
print ("Y shape is %s" % (str(torch.tensor(y_values).shape)))

#print ("X is")
#print(torch.tensor(x_values))


This is the dataset
9 + 9 = 18
2 + 1 = 3
9 + 2 = 11
5 + 9 = 14
3 + 9 = 12
4 + 6 = 10
6 + 5 = 11
6 + 9 = 15
3 + 9 = 12
1 + 0 = 1
 
X shape is torch.Size([10, 2])
Y shape is torch.Size([10, 1])


In [160]:
# train config
inputDim = input_size
outputDim = 1
learningRate = 1e-4 
epochs = 5000

##### For GPU #######
device = "cuda" if torch.cuda.is_available() else "cpu"

model = MaiModel(inputDim, outputDim)

# convert into GPU or CPU
model.to(device)

criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

In [161]:
# TRAIN
for epoch in range(epochs):
  inputs = Variable(torch.from_numpy(x_train))
  labels = Variable(torch.from_numpy(y_train))
  
  # convert into GPU or CPU
  inputs.to(device)
  labels.to(device)
  
  # inputs must be flattened from 10x2 to 20
  # inputs = inputs.view(1, -1)
  #print(inputs.shape)
  #print(inputs)
  #print(labels.shape)
  #print(labels)

  # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
  optimizer.zero_grad()

  # get output from the model, given the inputs
  outputs = model(inputs)

  # get loss for the predicted output
  loss = criterion(outputs, labels)
  # print(loss.item())
  # get gradients w.r.t to parameters
  loss.backward()

  # update parameters
  optimizer.step()

  # print loss only 10 times
  if epoch % math.ceil(epochs*0.1) == 0:
    print('epoch {}, loss {}'.format(epoch, loss.item()))

print('epoch {}, loss {}'.format(epoch, loss.item()))
print ("FINISHED! -------")

epoch 0, loss 169.24266052246094
epoch 500, loss 0.078524149954319
epoch 1000, loss 0.02283766120672226
epoch 1500, loss 0.007562672253698111
epoch 2000, loss 0.0033349147997796535
epoch 2500, loss 0.002135149436071515
epoch 3000, loss 0.001766358269378543
epoch 3500, loss 0.0016265257727354765
epoch 4000, loss 0.0015507168136537075
epoch 4500, loss 0.0014936631778255105
epoch 4999, loss 0.0014430712908506393
FINISHED! -------


Good! Now let's validate

In [162]:
validation_dataset = [
                      [10, 3],
                      [99, 2],
                      [7,8]
                      ]
val_numpy = np.array(validation_dataset, dtype=np.float32)
val = Variable(torch.from_numpy(val_numpy).to(device))

with torch.no_grad(): # we don't need gradients in the testing phase
    predicted = model(val).data.numpy()
    print(predicted)


[[ 13.013483]
 [101.79377 ]
 [ 15.016182]]
