In [1]:
# Imports
%matplotlib notebook

import sys
import itertools
import numpy as np  # Matrix and vector computation package
import matplotlib
import matplotlib.pyplot as plt  # Plotting library
import seaborn as sns  # Fancier plots

# Set seaborn plotting style
sns.set_style('darkgrid')
# Set the seed for reproducability
np.random.seed(seed=1)
#

In [7]:
# Create dataset
nb_train = 2000  # Number of training samples
# Addition of 2 n-bit numbers can result in a n+1 bit number
sequence_len = 7  # Length of the binary sequence

def create_dataset(nb_samples, sequence_len):
    """Create a dataset for binary addition and 
    return as input, targets."""
    max_int = 2**(sequence_len-1) # Maximum integer that can be added
     # Transform integer in binary format
    format_str = '{:0' + str(sequence_len) + 'b}'
    nb_inputs = 2  # Add 2 binary numbers
    nb_outputs = 1  # Result is 1 binary number
    # Input samples
    X = np.zeros((nb_samples, sequence_len, nb_inputs))
    # Target samples
    T = np.zeros((nb_samples, sequence_len, nb_outputs))
    # Fill up the input and target matrix
    for i in range(nb_samples):
        # Generate random numbers to add
        nb1 = np.random.randint(0, max_int)
        nb2 = np.random.randint(0, max_int)
        # Fill current input and target row.
        # Note that binary numbers are added from right to left, 
        #  but our RNN reads from left to right, so reverse the sequence.
        X[i,:,0] = list(
            reversed([int(b) for b in format_str.format(nb1)]))
        X[i,:,1] = list(
            reversed([int(b) for b in format_str.format(nb2)]))
        T[i,:,0] = list(
            reversed([int(b) for b in format_str.format(nb1+nb2)]))
    return X, T

# Create training samples
X_train, T_train = create_dataset(nb_train, sequence_len)
print(f'X_train tensor shape: {X_train.shape}')
print(f'T_train tensor shape: {T_train.shape}')
#

X_train tensor shape: (2000, 7, 2)
T_train tensor shape: (2000, 7, 1)


In [85]:
class TensorLinear(object):
    def __init__(self, n_in, n_out, tensor_order, W=None, b=None):
        a = np.sqrt(6.0 / (n_in + n_out))
        self.W = (np.random.uniform(-a, a, (n_in, n_out)) 
                  if W is None else W)
        self.b = (np.zeros((n_out)) if b is None else b)
        
    def backward(self, X, gY):
        # gW --> X -> ulaz, gY gradijenti izlaza, Y = X*W+B, dE/dW = dE/dY * dY/dW = gY * W
        # gradijenti po W-u ce mi biti dimenzija W-a, tj 2*3 u konkretnom slucaju, pa imamo
        # gB --> gradijent po biasu, X -> ulaz, gY prosli gradijent, onda imamo: Y = X*W+B = gY, gradijenti po biasu 1 * 3
        # gradijent po ulazu --> dLoss/dX = dLoss/dY * dY/dX =gY, W
        
        gW = np.einsum('ijk,ijm->km', X, gY)
        gB = np.einsum('ijk->k', Y)
        gX = np.einsum('ijk,kd->ijd', gY, self.W.T)
        
        return gX, gW, gB
        
    def forward(self, X):
        return np.einsum('ijk,kl->ijl', X, self.W) + self.b

In [86]:
class LogisticClassifier(object):
    
    def forward(self, X):
        return 1. / (1. + np.exp(-X))
        
    def backward(self, Y, T):
        return (Y - T) / (Y.shape[0] * Y.shape[1])
        
    def loss(self, Y, T):
        return -np.mean((T * np.log(Y)) + ((1-T) * np.log(1-Y)))

In [84]:
class TanH(object):
    def forward(self, X):
        return np.tanh(X)
    
    def backward(self, Y, output_grad):
        return (1.0 - Y**2) * output_grad

In [3]:
class RecurrentStateUpdate(object):
    def __init__(self, nbStates, W, b):
        self.linear = TensorLinear(nbStates, nbStates, 2, W, b)
        self.tanh = TanH()
        
    def forward(self, Xk, Sk):
        return self.tanh.forward(Xk + self.linear.forward(Sk))
    
    def backward(self, Sk0, Sk1, output_grad):
        gZ = self.tanh.backward(Sk1, output_grad)
        gSk0, gW, gB = self.linear.backward(Sk0, gZ)
        return gZ, gSk0, gW, gB

In [46]:
#MATRIX MUL EXAMPLE
A = np.random.randint(0, 10, size=(3, 3, 2))
B = np.random.randint(0, 10, size=(3, 2, 4))

print(f'A matrix =\n{A}\n')
print(f'B matrix = \n{B}\n')

A matrix =
[[[5 0]
  [7 0]
  [2 6]]

 [[2 1]
  [0 9]
  [0 1]]

 [[5 0]
  [5 4]
  [4 8]]]

B matrix = 
[[[2 8 8 9]
  [4 0 0 1]]

 [[3 3 1 9]
  [9 3 5 6]]

 [[8 1 7 8]
  [1 5 9 4]]]

