In [2]:
!pip install torch

Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/49/0e/e382bcf1a6ae8225f50b99cc26effa2d4cc6d66975ccf3fa9590efcbedce/torch-0.4.1-cp36-cp36m-manylinux1_x86_64.whl (519.5MB)
[K    100% |████████████████████████████████| 519.5MB 23kB/s 
tcmalloc: large alloc 1073750016 bytes == 0x59020000 @  0x7f2c577841c4 0x46d6a4 0x5fcbcc 0x4c494d 0x54f3c4 0x553aaf 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54e4c8
[?25hInstalling collected packages: torch
Successfully installed torch-0.4.1


In [4]:
!pip install tqdm



In [6]:
from google.colab import files

def getLocalFiles():
  _files = files.upload()
  
  if len(_files) > 0:
    for k,v in _files.items():
      open(k,'wb').write(v)
      
getLocalFiles()

Saving train_utils.py to train_utils.py


In [0]:
import torch
from torch import nn as nn
from nac import NAC
from nalu import NALU
from train_utils import get_batches
from train_utils import get_eval_loss
from train_utils import get_eval_preds
from train_utils import train
from torch.optim import Adam
import numpy as np
import pandas as pd

### Function to create toy dataset

In [0]:
def make_data(min_val, max_val, num_obs, op):
    '''
    Generates toy data and target by sampling values from a
    uniform distribution parameterized by min_val and max_val.
    '''
    data = np.random.uniform(min_val, max_val, size=(num_obs, 2))
    if op == '+':
        targets = data[:, 0] + data[:, 1]
    elif op == '-':
        targets = data[:, 0] - data[:, 1]
    elif op == '*':
        targets = data[:, 0] * data[:, 1]
    elif op == '/':
        targets = data[:, 0] / data[:, 1]
    elif op == '^2':
        data = np.random.uniform(min_val, max_val, size=(num_obs, 1))
        targets = data ** 2
    elif op == 'sqrt':
        data = np.random.uniform(min_val, max_val, size=(num_obs, 1))
        targets = np.sqrt(data)
    return data, targets

In [0]:
min_val = 10
max_val = 20
num_obs = 100
data = np.random.uniform(min_val, max_val, size=(num_obs, 2))


In [16]:
targets = data[:, 0] + data[:, 1]
targets

array([27.92826131, 24.88596644, 28.91719303, 30.29067356, 32.23343525,
       35.50652756, 27.89113865, 37.2116129 , 29.35203405, 33.10479651,
       27.19607767, 25.06381931, 30.70386678, 26.60309074, 24.79429472,
       31.98052153, 32.35630881, 29.17504104, 26.64194414, 34.0994268 ,
       36.37826637, 37.10639224, 25.49036963, 29.88711832, 26.67611672,
       24.32842432, 31.05943508, 23.89788953, 27.52466927, 30.14617453,
       30.16906908, 23.7709627 , 28.41133884, 32.80036436, 29.91021038,
       34.32835185, 25.38899816, 28.08150683, 25.3259484 , 30.40111874,
       34.65987031, 31.35840387, 31.78906433, 27.66915762, 30.48439927,
       30.9133009 , 26.37717772, 22.21665821, 25.92969277, 24.1205839 ,
       22.5550938 , 38.09126714, 32.52257899, 26.88062605, 28.84751656,
       26.8102298 , 31.12380355, 29.44053488, 30.73872542, 30.48633248,
       35.70466554, 36.68304828, 24.01405244, 33.65775802, 32.4066234 ,
       30.21770385, 26.99020967, 34.58647355, 26.94737819, 28.06

### Function to compute accuracy of predictions

In [0]:
def accuracy_score(preds, targets, tol=1e-3):
    '''
    Computes prediction accuracy by checking if
    predictions are equal to the target upto `tol`
    places after decimal.
    '''
    preds = preds.cpu().numpy().flatten()
    targets = targets.cpu().numpy().flatten()
    accuracy = np.isclose(preds, targets, rtol=tol)
    accuracy = accuracy.astype(np.int32).mean()
    return accuracy * 100

### Running experiments

In [12]:
eps = 1e-12
ops = ['+', '-', '*', '/', '^2', 'sqrt']
test_scores = {}

for op in ops:
    X_train, Y_train = make_data(10, 20, 10000, op)
    X_valid, Y_valid = make_data(10, 20, 2000, op)
    X_test, Y_test = make_data(0, 30, 10000, op) # Test set contains both interpolated
                                                 # and extrapolated data
     
    model = train(NALU(X_train.shape[1], 1), nn.SmoothL1Loss(), Adam, X_train, Y_train,
                  X_valid, Y_valid, patience=15, batch_size=32, num_epochs=100)
    
    test_preds, test_targets = get_eval_preds(model, X_test,
                                              Y_test, 32, False)
    
    test_acc = accuracy_score(test_preds, test_targets)
    
    print('Test accuracy for [{}]: {:.2f}%'.format(op, test_acc))
    
    if op == '^2':
        test_scores['a^2'] = test_acc
    elif op == 'sqrt':
        test_scores['sqrt(a)'] = test_acc
    else:
        test_scores['a '+op+' b'] = test_acc

[Epoch: 1] Training loss after 48 batches: 3.404:  15%|█▌        | 47/313 [00:00<00:01, 195.53it/s]

Test accuracy for [+]: 0.53%


[Epoch: 1] Training loss after 46 batches: 218.235:  14%|█▍        | 45/313 [00:00<00:01, 218.28it/s]

Test accuracy for [-]: 0.01%


[Epoch: 1] Training loss after 42 batches: 2.354:  13%|█▎        | 41/313 [00:00<00:01, 232.83it/s]

Test accuracy for [*]: 0.09%


[Epoch: 1] Training loss after 44 batches: 231.161:  14%|█▎        | 43/313 [00:00<00:01, 208.97it/s]

Test accuracy for [/]: 0.17%


[Epoch: 1] Training loss after 44 batches: 3.110:  14%|█▎        | 43/313 [00:00<00:01, 210.23it/s]

Test accuracy for [^2]: 0.00%




Test accuracy for [sqrt]: 0.55%


In [17]:
eps = 1e-12
ops = ['+', '-', '*', '/', '^2', 'sqrt']
test_scores = {}

for op in ops:
    X_train, Y_train = make_data(10, 20, 10000, op)
    X_valid, Y_valid = make_data(10, 20, 2000, op)
    X_test, Y_test = make_data(0, 30, 10000, op) # Test set contains both interpolated
                                                 # and extrapolated data
     
    model = train(NALU(X_train.shape[1], 1), nn.SmoothL1Loss(), Adam, X_train, Y_train,
                  X_valid, Y_valid, patience=15, batch_size=32, num_epochs=100)
    
    test_preds, test_targets = get_eval_preds(model, X_test,
                                              Y_test, 32, False)
    
    test_acc = accuracy_score(test_preds, test_targets)
    
    print('Test accuracy for [{}]: {:.2f}%'.format(op, test_acc))
    
    if op == '^2':
        test_scores['a^2'] = test_acc
    elif op == 'sqrt':
        test_scores['sqrt(a)'] = test_acc
    else:
        test_scores['a '+op+' b'] = test_acc

[Epoch: 1] Training loss after 53 batches: 2.988:  17%|█▋        | 52/313 [00:00<00:01, 244.23it/s]

Test accuracy for [+]: 0.98%


[Epoch: 1] Training loss after 48 batches: 225.217:  15%|█▌        | 47/313 [00:00<00:01, 215.32it/s]

Test accuracy for [-]: 99.66%


[Epoch: 1] Training loss after 44 batches: 11.482:  14%|█▎        | 43/313 [00:00<00:01, 194.58it/s]

Test accuracy for [*]: 99.98%


[Epoch: 1] Training loss after 46 batches: 229.854:  14%|█▍        | 45/313 [00:00<00:01, 208.22it/s]

Test accuracy for [/]: 0.42%


[Epoch: 1] Training loss after 42 batches: 0.041:  13%|█▎        | 41/313 [00:00<00:01, 207.79it/s]

Test accuracy for [^2]: 0.01%




Test accuracy for [sqrt]: 99.97%


In [22]:
import tqdm
tqdm.__version__

'4.25.0'

### Saving the results

In [0]:
results = pd.DataFrame.from_dict(test_scores, orient='index', columns=['Accuracy'])
results.to_csv('results.csv', index=False)
results.head(6)