In [1]:
import numpy as np
import torch as T

In [2]:
from lstm import LSTMModel

In [3]:
import pizza as pz

## 8-bits summation task

In [4]:
# training dataset generation
int2binary = {}
binary_dim = 8

largest_number = pow(2, binary_dim)  # 256
binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T,axis=1) # 256, 8
for i in range(largest_number):
    int2binary[i] = binary[i]

In [5]:
binary_to_int = lambda t: sum([_t * pow(2, i) for i, _t in enumerate(reversed(t))])

In [6]:
def fetcher():
    # generate a simple addition problem (a + b = c)
    a_int = np.random.randint(largest_number/2) # int version
    a = list(reversed(int2binary[a_int])) # binary encoding
    b_int = np.random.randint(largest_number/2) # int version
    b = list(reversed(int2binary[b_int])) # binary encoding
    # true answer
    c_int = a_int + b_int
    c = list(reversed(int2binary[c_int]))
    return T.tensor(np.stack((a, b)), dtype=T.float64), T.tensor(np.reshape(c, (1, -1)), dtype=T.float64)


def hook(model, loss, y_preds, j, X, y):
    print_every = 1000
    if j % print_every == 0:
        a = list(reversed([int(_x) for _x in X[0]]))
        b = list(reversed([int(_x) for _x in X[1]]))
        c = list(reversed([int(_y) for _y in y.squeeze()]))
        
        print()
        print('[{0}] Loss batch : {1:.4f}'.format(j, loss))
        print('      {0}'.format(a))
        print('  +   {0}'.format(b))
        print('-' * 35)
        print('      {0}'.format(c))
        d = [int(np.round(p[0, 0])) for p in reversed(y_preds)]
        print('Pred: {0}'.format(d))
        print('{0} + {1} = {2}'.format(binary_to_int(a), binary_to_int(b), binary_to_int(d)))

In [7]:
least_square = lambda a, b: (a - b).pow(2.)[0, 0]
# pytorch has a tough time trying to optimize absolute loss
absolute_loss = lambda a, b: T.abs(a - b)[0, 0]

params_lstm = {'n_in': 2, 'n_hidden1': 12, 'n_hidden2': 6, 'n_out': 1, 
               'optimizer': pz.Adagrad(), 'learning_rate': -0.1, 'clamping': 20., 
               'compute_loss': least_square, 'out_nonlinear': T.sigmoid, 'device': T.device('cpu'),
               'w_init': (None, None)
              }

lstm_model = LSTMModel(params_lstm)
lstm_model.train(n_iterations=5000, fetcher=fetcher, hook=hook)


[0] Loss batch : 0.2531
      [0, 0, 0, 0, 1, 0, 1, 0]
  +   [0, 1, 1, 0, 1, 0, 1, 0]
-----------------------------------
      [0, 1, 1, 1, 0, 1, 0, 0]
Pred: [1, 1, 1, 1, 1, 0, 1, 0]
10 + 106 = 250

[1000] Loss batch : 0.0123
      [0, 0, 1, 0, 1, 1, 1, 1]
  +   [0, 1, 1, 1, 0, 0, 1, 0]
-----------------------------------
      [1, 0, 1, 0, 0, 0, 0, 1]
Pred: [1, 0, 1, 0, 0, 0, 0, 1]
47 + 114 = 161

[2000] Loss batch : 0.0052
      [0, 1, 1, 0, 0, 1, 0, 1]
  +   [0, 0, 0, 1, 1, 0, 1, 1]
-----------------------------------
      [1, 0, 0, 0, 0, 0, 0, 0]
Pred: [1, 0, 0, 0, 0, 0, 0, 0]
101 + 27 = 128

[3000] Loss batch : 0.0020
      [0, 1, 1, 0, 1, 0, 1, 1]
  +   [0, 1, 0, 0, 0, 1, 0, 1]
-----------------------------------
      [1, 0, 1, 1, 0, 0, 0, 0]
Pred: [1, 0, 1, 1, 0, 0, 0, 0]
107 + 69 = 176

[4000] Loss batch : 0.0001
      [0, 0, 1, 0, 1, 0, 0, 0]
  +   [0, 1, 0, 0, 0, 0, 1, 0]
-----------------------------------
      [0, 1, 1, 0, 1, 0, 1, 0]
Pred: [0, 1, 1, 0, 1, 0, 1, 0]
40 

In [None]:
def get_numbers(a_int, b_int, length=8):
    # generate a simple addition problem (a + b = c)
    a = list(reversed(int_to_binary(a_int, length))) # binary encoding
    b = list(reversed(int_to_binary(b_int, length))) # binary encoding
    # true answer
    c_int = a_int + b_int
    c = list(reversed(int_to_binary(c_int, length)))
    return T.tensor(np.stack((a, b)), dtype=T.float64), T.tensor(np.reshape(c, (1, -1)), dtype=T.float64)


def test_random_sum(max_number, min_number=None, n_test=100, model=None, length=8):
    min_number = 0 if min_number is None else min_number
    print('Testing random sum for numbers < {0} and > {1}, with sum smaller than {2}'.format(max_number, 
                                                                                             min_number, 2**length))
    n_fail = 0
    for i in range(n_test):
        a = np.random.randint(low=min_number, high=max_number)
        b = np.random.randint(low=min_number, high=max_number)
        c = a + b
        _a, _b = get_numbers(a, b, length)
        
        loss, y_preds = model.forward(_a, _b, train=False)
        d = binary_to_int([int(np.round(p)) for p in reversed(y_preds)])
        if c != d:
            n_fail += 1
            print('Fail at {0} + {1} = {2}'.format(a, b, d))
    print('total failures: {0}'.format(n_fail))