In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
import matplotlib
import numpy as np
import tensorflow as tf # NOTE: This code runs with tensorflow version 2.0.0
import matplotlib.pyplot as plt
from IPython.display import SVG
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Dense, LSTM, GRU, Embedding, Lambda, concatenate, multiply, add
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import model_to_dot
from tqdm import tqdm,trange

# Set parameters

In [None]:
num_train_examples = 100000
max_train_length = 50

num_test_examples = 10000
min_test_length=5
max_test_length=100
step_test_length=5
losstype='mse'

lengths = range(min_test_length, max_test_length, step_test_length)

# Some data functions, not needed if loading data

In [None]:
def gen_train_data(num_train_examples, max_train_length):
    X = np.zeros((num_train_examples,100))
    sum_X = np.zeros((num_train_examples))
    for i in tqdm(range(num_train_examples), desc='Generating train examples: '):
        n = np.random.randint(1,max_train_length)
        for j in range(1,n+1):
            X[i,-j] = np.random.randint(1,10)
        sum_X[i] = np.sum(X[i])
    return X, sum_X

def gen_test_data(num_examples, length):
    Y = np.zeros((num_examples, max_test_length))
    sum_Y = np.zeros((num_examples))
    for i in range(num_examples):
        for j in range(1,length+1):
            Y[i,-j] = np.random.randint(1,10)
        sum_Y[i] = np.sum(Y[i])
    return Y, sum_Y

def gen_test_lengths(num_test_examples):
    lengths = range(min_test_length, max_test_length, step_test_length)
    testY = {}
    testSumY = {}
    for l in lengths:    
        # generate test data
        Y, sum_Y = gen_test_data(num_test_examples, l)
        testY[l] = Y
        testSumY[l] = sum_Y
    return testY, testSumY

# Get Data

In [None]:
# We can either generate and save or load existing
gen = True
if gen:
    # If we want to generate and save the training and test sets
    X, sum_X = gen_train_data(num_train_examples, max_train_length)
    testY, testSumY = gen_test_lengths(num_test_examples)
    metrics = {}
    allPreds = {}
    if False:
        np.save("data/X",X)
        np.save("data/sum_X",sum_X)
        np.save("data/testY",testY)
        np.save("data/testSumY",testSumY)
        np.save("data/metrics",metrics)
        np.save("data/allPreds",allPreds)    
else:
    X = np.load("data/X.npy")
    sum_X = np.load("data/sum_X.npy")
    testY = np.load("data/testY.npy",allow_pickle=True)
    testSumY = np.load("data/testSumY.npy",allow_pickle=True)
    metrics = np.load("data/metrics.npy",allow_pickle=True)
    allPreds = np.load("data/allPreds.npy",allow_pickle=True)
    testY = testY[()]
    testSumY = testSumY[()]
    metrics = metrics[()]
    allPreds = allPreds[()]
    

orig_sum_X = sum_X
orig_testSumY = testSumY

In [None]:
# To change to problem of outputting the ones digit only, do this
sum_X = [x%10 for x in orig_sum_X]
testSumY = {x: [y%10 for y in orig_testSumY[x]] for x in orig_testSumY}

# Helper functions

In [None]:
def fun(x, mask):
    # Handles masking
    if K.is_keras_tensor(mask):
        mask_cast = K.cast(mask, 'float32')
        expanded = K.expand_dims(mask_cast)
        return K.sum(expanded * x, axis=1)
    return K.sum(x, axis=1)

In [None]:
def visualizeModel(model):
    display(SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg')))
    return None

def trainModel(model,numEpochs=200,trainData=X,trainWeights=sum_X):
    # train
    earlyStop = EarlyStopping(monitor='val_loss',patience=10,verbose=1,min_delta=0.00001)
    lrscheduler = ReduceLROnPlateau(monitor='val_loss',patience=2,factor=0.5,verbose=1,min_delta=0.0001,cooldown=3)
    
    model.fit(X, sum_X, epochs=numEpochs, batch_size=128,
            shuffle=True, validation_split=0.0123456789,
            callbacks=[earlyStop,lrscheduler])

    temp_we = {}
    for idx, layer in enumerate(model.layers):
        w = layer.get_weights()
        temp_we[idx] = w

    preds = model.predict(X, batch_size=128, verbose=1)
    print("Training accuracy:", 1.0*np.sum(np.squeeze(np.round(preds))==sum_X)/len(sum_X))
    print("Mean absolute error:", np.sum(np.abs(np.squeeze(preds)-sum_X))/len(sum_X))
    
    return temp_we

def loadModel(model, modelWeights):
    # load weights
    for idx, layer in enumerate(model.layers):
        w = modelWeights[idx]
        layer.set_weights(w)
    return model

def evaluateModel(modelFunc, modelWeights, name):
    metrics[name] = {'acc':[], 'mae':[], 'mse':[]}
    allPreds[name] = {}
    for l in lengths:
        print('Evaluating at length: ', l)
        K.clear_session()

        # Retrieve test data
        Y = testY[l]
        sum_Y = testSumY[l]

        # model
        model = modelFunc(max_test_length)
        model = loadModel(model, modelWeights)

        # prediction
        preds = model.predict(Y, batch_size=128, verbose=0)
        allPreds[name][l] = preds
        metrics[name]['acc'].append(1.0*np.sum(np.squeeze(np.round(preds))==sum_Y)/len(sum_Y))
        metrics[name]['mae'].append(np.sum(np.abs(np.squeeze(preds)-sum_Y))/len(sum_Y))
        metrics[name]['mse'].append(np.dot(np.squeeze(preds)-sum_Y, np.squeeze(preds)-sum_Y)/len(sum_Y))
    return None

# DeepSet Model

In [None]:
# This code comes from the DeepSets repository but is modified to include masking in the Lambda layer
def get_deepset_model(max_length):
    input_txt = Input(shape=(max_length,))
    x = Embedding(11, 100, mask_zero=True)(input_txt)
    x = Dense(30, activation='tanh')(x)
    # we don't want to pass the mask, we want to use it before applying the sum though
    Adder = Lambda(fun, output_shape=lambda s: (s[0], s[2]), )
    x = Adder(x)
    encoded = Dense(1)(x)
    summer = Model(input_txt, encoded)
    adam = Adam(lr=1e-4, epsilon=1e-3)
    summer.compile(optimizer=adam, loss=losstype)
    return summer

# Train Deepset model

In [None]:
# model
model = get_deepset_model(100)
deep_mse_we = trainModel(model)

# Evaluate Deepsets

In [None]:
evaluateModel(get_deepset_model, deep_mse_we, "DeepSets")

# Equal Parameter Deepsets

In [None]:
# Modified DeepSets model to match our number of parameters
def get_ep_model(max_length):
    input_txt = Input(shape=(max_length,))
    x = Embedding(11, 150, mask_zero=True)(input_txt)
    # we don't want to pass the mask, we want to use it before applying the sum though
    Adder = Lambda(fun, output_shape=lambda s: (s[0], s[2]), )
    x = Adder(x)
    encoded = Dense(1)(x)
    summer = Model(input_txt, encoded)
    adam = Adam(lr=1e-4, epsilon=1e-3)
    summer.compile(optimizer=adam, loss=losstype)
    return summer

In [None]:
# model
model = get_ep_model(100)
ep_mse_we = trainModel(model)

In [None]:
evaluateModel(get_ep_model, ep_mse_we, "epDeepSets")

# LSTM Model

In [None]:
# LSTM baseline, code from DeepSets repository
def get_lstm_model(max_length):
    input_txt = Input(shape=(max_length,))
    x = Embedding(11, 100, mask_zero=True)(input_txt)
    x = LSTM(50)(x)
    encoded = Dense(1)(x)
    summer = Model(input_txt, encoded)
    adam = Adam(lr=1e-4)
    summer.compile(optimizer=adam, loss=losstype)
    return summer

# Train LSTM model

In [None]:
# model
model = get_lstm_model(100)
lstm_we = trainModel(model)

# Evaluate LSTM

In [None]:
evaluateModel(get_lstm_model, lstm_we, "LSTM")

# GRU Model

In [None]:
# GRU baseline, code from DeepSets repository
def get_gru_model(max_length):
    input_txt = Input(shape=(max_length,))
    x = Embedding(11, 100, mask_zero=True)(input_txt)
    x = GRU(80)(x)
    encoded = Dense(1)(x)
    summer = Model(input_txt, encoded)
    adam = Adam(lr=1e-4)
    summer.compile(optimizer=adam, loss=losstype)
    return summer

# Train GRU model

In [None]:
# model
model = get_gru_model(100)
gru_we = trainModel(model)

# Evaluate GRU model

In [None]:
evaluateModel(get_gru_model, gru_we, "GRU")

In [None]:
# Save computed metrics and predictions for future reference
np.save("data/metrics",metrics)
np.save("data/allPreds",allPreds)

# Our Method (Complex Normalized)

In [None]:
def complexNormedMultiply(q, mask):
    x = q[0]
    y = q[1]
    r = q[2]
    initX = q[3]
    initY = q[4]
    initR = q[5]
    # Here x is the real part and y is the imaginary part
    if tf.is_tensor(mask):
        # this sets masked values to 1+0i
        mask_cast = K.cast(mask, 'float32')
        expanded = K.expand_dims(mask_cast)
        zeroX = expanded * x
        newY = expanded * y
        newR = expanded * r
        # here I flip the mask (essentially XOR)
        antiMask = tf.ones(expanded.shape)-expanded
        newX = zeroX+antiMask
    else:
        newX = x
        newY = y
        newR = r
    sumVecs = tf.math.sqrt(tf.multiply(newX,newX)+tf.multiply(newY,newY))
    normedX = newX/sumVecs
    normedY = newY/sumVecs
    normedR = newR
    initSum = tf.math.sqrt(tf.multiply(initX,initX)+tf.multiply(initY,initY))
    inX = initX/initSum
    inY = initY/initSum
    # Using builtin complex numbers
    complexVec = tf.complex(normedX,normedY)
    initVec = tf.complex(inX,inY)
    complexOut = K.prod(complexVec,axis=1)
    newCOut = multiply([complexOut,tf.expand_dims(initVec,0)])
    rOut = K.sum(normedR,axis=1)
    newROut = add([rOut,tf.expand_dims(initR,0)])
    tensorOut = concatenate(list([tf.math.real(newCOut),tf.math.imag(newCOut),newROut]))
    return tensorOut

In [None]:
def get_normedcartset_model(max_length):
    edim = 50
    ddim = 15
    input_txt = Input(shape=(max_length,))
    # We want x to be the real part and y to be the imaginary part and r is the magnitude in a sense
    # e^r(x+yi)
    x = Embedding(11, edim, mask_zero=True)(input_txt)
    y = Embedding(11, edim, mask_zero=True)(input_txt)
    r = Embedding(11, edim, mask_zero=True)(input_txt)
    # the init variables account for lambda*rho
    # thus lambda*rho = e^initR(initX+initY)
    initX = K.variable(value=np.ones(edim),dtype='float32')
    initY = K.variable(value=np.ones(edim),dtype='float32')
    initR = K.variable(value=np.ones(edim),dtype='float32')
    CM = Lambda(complexNormedMultiply, output_shape=lambda s: (s[0][0], s[0][2]*3), name="NormedComplexMultiply")
    z = CM([x,y,r,initX,initY,initR])
    encoded = Dense(1)(z)
    model = Model(input_txt, encoded)
    adam = Adam(lr=1e-4, epsilon=1e-3)
    model.compile(optimizer=adam, loss=losstype)
    return model

# Train Normed Complex Cartesian model

In [None]:
# model
model = get_normedcartset_model(100)
normedcartset_mse_we = trainModel(model)

# Evaluate Normed Complex Cartesian model

In [None]:
evaluateModel(get_normedcartset_model, normedcartset_mse_we, "Our Method")

# Compare Models

In [None]:
model = get_deepset_model(max_train_length)
print("DeepSets model has " + str(model.count_params()) + " parameters")
model = get_lstm_model(max_train_length)
print("LSTM model has " + str(model.count_params()) + " parameters")
model = get_gru_model(max_train_length)
print("GRU model has " + str(model.count_params()) + " parameters")
model = get_normedcartset_model(max_train_length)
print("Our model has " + str(model.count_params()) + " parameters")


In [None]:
model=get_normedcartset_model(max_train_length)
visualizeModel(model)

# Compare the performance

In [None]:
# This code comes mainly from the DeepSets repository, with some modifications
font = {'size': 14}
matplotlib.rc('font', **font)

scale = 1
plt.figure(figsize=(10*scale, 8*scale))

trainedModels = metrics.keys()
legendNames = []
spot = 0
markers = ['o-','s-','+-','D-','1-','2-','x-']
for modName in trainedModels:
    legendNames.append(modName)
    plt.plot(lengths, metrics[modName]['acc'], markers[spot%7])
    spot+=1
plt.xlabel('Number of digits')
plt.ylabel('Accuracy')
plt.title('Accuracy')
plt.ylim( 0, 1.1 )
plt.xlim( 5, 95 )
plt.legend(legendNames, loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()

In [None]:
font = {'size': 14}
matplotlib.rc('font', **font)

scale = 1
plt.figure(figsize=(10*scale, 8*scale))

for modName in trainedModels:
    plt.plot(lengths, np.array(metrics[modName]['mae'])/1e2, 'x-')
plt.xlabel('Number of input digits')
plt.ylabel('Mean absolute error/1e2')
plt.title('MAE')
plt.legend(legendNames, loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()

In [None]:
font = {'size': 14}
matplotlib.rc('font', **font)

scale = 1
plt.figure(figsize=(10*scale, 8*scale))

for modName in trainedModels:
    plt.plot(lengths, np.array(metrics[modName]['mse']), 'x-')
plt.xlabel('Number of input digits')
plt.ylabel('Mean squared error')
plt.title('MSE')
plt.legend(legendNames, loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()