<img src="images/nalu.png" align=right width=40%>
# Neural Arithmetic Logic Units
Author: Jin Yeom (jinyeom@utexas.edu)

## Contents
- [Numerical Extrapolation Failures in Neural Networks](#Numerical-Extrapolation-Failures-in-Neural-Networks)

In [27]:
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torchsummary import summary
from tqdm import tqdm_notebook as tqdm

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device =", device)

device = cpu


## Numerical Extrapolation Failures in Neural Networks

In [19]:
class ScalarIdentityAutoencoder(nn.Module):
    def __init__(self, nonlin):
        super(ScalarIdentityAutoencoder, self).__init__()
        self.fc1 = nn.Linear(1, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 8)
        self.fc4 = nn.Linear(8, 1)
        self.nonlin = nonlin
        
    def forward(self, x):
        x = self.nonlin(self.fc1(x))
        x = self.nonlin(self.fc2(x))
        x = self.nonlin(self.fc3(x))
        return self.fc4(x)

In [20]:
summary(ScalarIdentityAutoencoder(nn.ReLU(inplace=True)).to(device), (1,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                    [-1, 8]              16
              ReLU-2                    [-1, 8]               0
            Linear-3                    [-1, 8]              72
              ReLU-4                    [-1, 8]               0
            Linear-5                    [-1, 8]              72
              ReLU-6                    [-1, 8]               0
            Linear-7                    [-1, 1]               9
Total params: 169
Trainable params: 169
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [25]:
# nonlinear functions to be tested
# NOTE: threshold function is excluded
for nonlin in [nn.Hardtanh(),
               nn.Sigmoid(),
               nn.ReLU6(),
               nn.Tanh(),
               nn.Tanhshrink(),
               nn.Hardshrink(),
               nn.LeakyReLU(),
               nn.Softshrink(),
               nn.Softsign(),
               nn.Threshold(0, 1),
               nn.ReLU(),
               nn.PReLU(),
               nn.Softplus(),
               nn.ELU(),
               nn.SELU()]:
    model = ScalarIdentityAutoencoder(nonlin)
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    for i in tqdm(10000):
        
        x = torch.rand(16, 1).to(device) * 10 - 5 # uniform random in [-5, 5]
        x_ = model(x)
        