#### Installing dependencies

In [1]:
!pip install numpy torch matplotlib



#### Importing packages

In [2]:
%matplotlib inline
import numpy as np
import torch
import torch.utils.data as Data
from torch.autograd import Variable
import matplotlib.pyplot as plt
from collections import OrderedDict
from math import pi

#### NN class for Regression

In [3]:
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize, nhiddenLayers, hiddenNeurons):
        super(linearRegression, self).__init__()
        layers = []

        for i in range(0,nhiddenLayers): # adding hidden layers
            if(i==0): #Input layer
                layers.append(torch.nn.Linear(inputSize, hiddenNeurons))
                layers.append(torch.nn.ReLU())
            else:
                layers.append(torch.nn.BatchNorm1d(hiddenNeurons))
                layers.append(torch.nn.Linear(hiddenNeurons, hiddenNeurons))
                layers.append(torch.nn.ReLU())

        layers.append(torch.nn.Linear(hiddenNeurons, outputSize)) # output layer
        self.model = torch.nn.Sequential(*layers)
        print("\nNetwork Architecture: \n", self.model,"\n")

    def forward(self, x):
        out = self.model(x)
        return out

#### NN Parameters

In [4]:
learningRate = 0.001
momentum = 0.9 
epochs = 200 # Original Value is 2000
batch_size = 3000
train_set = 60000
test_set = 60000

#### Making the Dataset

In [5]:
f1 = lambda x: 2 * (x ** 2) - 1
func = lambda x: f1(f1(np.cos(x)))
x = torch.unsqueeze(torch.FloatTensor(train_set+test_set).uniform_(-2*pi, 2*pi), dim=1)
y = func(x)

# Train Dataset and making generator for mini-batches
train_x = x[0:train_set]
train_y = y[0:train_set]
train_dataset = Data.TensorDataset(train_x, train_y)
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Test Dataset
test_x = x[train_set:(train_set+test_set)]
test_y = y[train_set:(train_set+test_set)]

#### Function to obtain training and testing results for a specific network architecture

In [6]:
def trainTestNet(arch:tuple) -> torch.tensor:
    
    # Defining network architecture
    print("Hidden Layers Quantity: ",arch[0],",  Units per Layer: ",arch[1],"\n")
    net = linearRegression(1, 1, arch[0], arch[1]) # Defining network architecture where arch[1] corresponds to the layer number and arch[2] to the number of units in a hidden layer
    criterion = torch.nn.MSELoss() # Define criterion to evaluate the network, in this case MSE
    optimizer = torch.optim.SGD(net.parameters(), lr=learningRate, momentum=momentum) # optimizer used is SGD
        
    # Training the network
    print("Training Results: ")
    for epoch in range(1,epochs+1): # Iteration in each epoch
        for step, (t_x, t_y) in enumerate(train_loader): # Iterating in each mini-batch
            optimizer.zero_grad() # Removing buffer from previous epochs
            outputs = net(t_x) # Output training into the model
            loss = criterion(outputs, t_y) # Get loss for predicted outputs
            if((epoch%100==0) and step == train_set/batch_size-1): #Print Train results for each 100 epochs
                print("Epoch: ", epoch,"  Step: ", step,"  Loss: ", loss)
            loss.backward() # Propagate the loss
            optimizer.step() # Update parameters
            
    # Evaluating the network using the test dataset
    print("\n\nEvaluation over test dataset:")
    test_outputs = net(test_x) # Generating network outputs for test data
    mean_test_error = torch.sqrt(criterion(test_outputs, test_y)) # Calculating RMSE over the predicted data
    print("Test Error (RMSE): ", mean_test_error, "\n\n\n\n")
    
    return mean_test_error

In [7]:
#result = trainTestNet((1,256))
#print(result)

#### Defining network architectures deployment

In [8]:
net_archs = [(1,24), (2, 36), (3,24)] # The first value of a tuple corresponds to the number of hidden layers and the other value to number of units/layer
times = 5 # Repeating each architecture result N times
results = torch.zeros([len(net_archs), times])
print(results)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


#### Running Experiments

In [9]:
for t in range(times): # Repeating the process N times
    for index, arch in enumerate(net_archs): # Iterating over all architectures
        results[index, t] = trainTestNet(arch)
        

Hidden Layers Quantity:  1 ,  Units per Layer:  24 


Network Architecture: 
 Sequential(
  (0): Linear(in_features=1, out_features=24, bias=True)
  (1): ReLU()
  (2): Linear(in_features=24, out_features=1, bias=True)
) 

Training Results: 
Epoch:  100   Step:  19   Loss:  tensor(0.4901, grad_fn=<MseLossBackward>)
Epoch:  200   Step:  19   Loss:  tensor(0.4819, grad_fn=<MseLossBackward>)


Evaluation over test dataset:
Test Error (RMSE):  tensor(0.6900, grad_fn=<SqrtBackward>) 




Hidden Layers Quantity:  2 ,  Units per Layer:  36 


Network Architecture: 
 Sequential(
  (0): Linear(in_features=1, out_features=36, bias=True)
  (1): ReLU()
  (2): BatchNorm1d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Linear(in_features=36, out_features=36, bias=True)
  (4): ReLU()
  (5): Linear(in_features=36, out_features=1, bias=True)
) 

Training Results: 
Epoch:  100   Step:  19   Loss:  tensor(0.3968, grad_fn=<MseLossBackward>)
Epoch:  200   Step:  19   Loss:  tenso

#### Plotting Results

In [15]:
print(results)
print(torch.min(results, 1))

tensor([[0.6900, 0.6934, 0.6892, 0.6880, 0.6914],
        [0.5696, 0.5730, 0.5835, 0.6234, 0.6461],
        [0.0984, 0.2136, 0.1363, 0.1965, 0.1489]], grad_fn=<CopySlices>)
torch.return_types.min(
values=tensor([0.6880, 0.5696, 0.0984], grad_fn=<MinBackward0>),
indices=tensor([3, 0, 0]))


In [None]:
fig1 = plt.figure()
fig1.set_title("Text Erro x Number of Units")
fig1.set_xlabel("Number of Units")