In [0]:
# Imports
from __future__ import absolute_import, division, print_function

import numpy as np
import os
import h5py
import re
import tensorflow as tf
import time
import math
import pickle

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Bidirectional,Dropout, Embedding, \
LSTM, Multiply, Lambda, Permute, Reshape, Masking, Input, Softmax, Subtract, \
Concatenate,Dropout,MaxPooling1D,AveragePooling1D,BatchNormalization, Maximum 
from tensorflow.keras import backend as K
from tensorflow.keras import activations
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.initializers import Constant, RandomUniform
from tensorflow.contrib.layers import maxout

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Import developed modules
from constants import *
from dataprep import *

In [0]:
# Initialize BCN constants
EMBEDDING = "COVE" # or "ELMO"
RUN_SEED = 2
tf.random.set_random_seed(RUN_SEED)
RUN_NUMBER = 3
BCN_DROPOUT = 0.3
BCN_MAX_LENGTH = 35
N_TARGET=None
BCN_DATASET = "SST-2" # or "SST-5" or "MMT"
BATCH_SIZE = 256
ENCODER_MODEL = 'ATTN'
EPOCHS = 70

if BCN_DATASET == "SST-5":
  elmoDir = "elmo-sst5"
  N_TARGET=5
elif BCN_DATASET == "SST-2":
  elmoDir = "elmo-sst2"
  N_TARGET=2

if EMBEDDING == "ELMO":
  BCN_BI_UNITS = 300
  FINAL_DIM = 1200
  BCN_DIM = 768
elif EMBEDDING == "COVE":
  BCN_BI_UNITS = 300
  FINAL_DIM = 1200
  BCN_DIM = 900
elif EMBEDDING == "GLOVE":
  BCN_BI_UNITS = 300
  FINAL_DIM = 1200
  BCN_DIM = 300
elif EMBEDDING == "CHAR+GLOVE":
  BCN_BI_UNITS = 300
  FINAL_DIM = 2400
  BCN_DIM = 400
  
RESULTS_PATH = # Make this teh path where you want results saved
    
CHECKPOINT_PATH = # Make this the path where you want checkpoints saved
    
if BCN_DATASET == "SST-5":
  dataPrefix = "./elmo-sst5/"
elif BCN_DATASET == "SST-2":
  dataPrefix = "./elmo-sst2/"

set_labels = ['train', 'test']

In [0]:
def read_sentiment_labels(file): 
  labels = []
  train_sent_path = dataPrefix + file + 'dataset_labels.txt'
  f_train_sent = open(train_sent_path, 'r', encoding='utf-8')
  for x in f_train_sent : 
    labels.append(float(x))
  f_train_sent.close()
  return labels 

# Read the SST labels
sent_labels = read_sentiment_labels("train_")
test_sent_labels = read_sentiment_labels("test_")

# Convert from floats to one-hot vectors
if BCN_DATASET == "SST-5":
  from keras.utils import to_categorical
  sent_labels = [min(4, math.floor(5*x)) for x in sent_labels]
  test_sent_labels = [min(4, math.floor(5*x)) for x in test_sent_labels]

embeddings = []
BCN_X = []
BCN_X_test = []
if EMBEDDING == 'COVE':
  ENCODING_PATH = './encoding/' + ENCODER_MODEL + '/' + str(RUN_NUMBER) +'/'
  with open (ENCODING_PATH +BCN_DATASET+ 'train_encodings', 'rb') as fp:
    encodings = pickle.load(fp)
  BCN_X = encodings
  with open (ENCODING_PATH +BCN_DATASET+ 'test_encodings', 'rb') as fp:
    encodings = pickle.load(fp)
  BCN_X_test = encodings

  
## Shuffle data 
c = list(zip(BCN_X, sent_labels))
np.random.shuffle(c)
BCN_X, sent_labels = zip(*c)


#Create validation set
train_size = int(len(BCN_X)*.90)
# Train split ~ 9/10
BCN_X_train = BCN_X[:train_size]
train_sent_labels = sent_labels[:train_size]

# Val split ~ 1/10
BCN_X_val = BCN_X[train_size:]
val_sent_labels = sent_labels[train_size:]

c = list(zip(BCN_X_test,test_sent_labels))
BCN_X_test, test_sent_labels = zip(*c)


BATCHES = int(math.ceil(len(BCN_X_train)/BATCH_SIZE))

In [0]:
# Build Model

# Lambda layer for matrix multiplication
def multiply_t_a(Z):
  return tf.linalg.matmul(Z[0], Z[1], transpose_a=True)

def multiply_t_b(Z):
  return tf.linalg.matmul(Z[0], Z[1], transpose_b=True)

def multiply_no_t(Z):
  return tf.linalg.matmul(Z[0], Z[1])

# Lambda layers for maxout network
def maxout_layer1(Z):
  return maxout(num_units = FINAL_DIM, inputs = Z)

# Lambda layers for maxout network
def maxout_layer2(Z):
  return maxout(num_units = int(FINAL_DIM/2), inputs = Z)

def maxout_layer3(Z):
  return maxout(num_units = 1, inputs = Z)

def squeeze(Z):
  return tf.keras.backend.squeeze(Z, axis=2)

def max_pooling(Z):
  return tf.reduce_max(Z, axis=1)

def min_pooling(Z):
  return tf.reduce_min(Z, axis=1)

def mean_pooling(Z):
  return tf.reduce_mean(Z, axis=1)

# Equations 7 & 8
w_x = Input(shape=(BCN_MAX_LENGTH, BCN_DIM, ))
w_y = Input(shape=(BCN_MAX_LENGTH, BCN_DIM, ))

w_x_drop = Dropout(rate=BCN_DROPOUT)(w_x)
w_y_drop = Dropout(rate=BCN_DROPOUT)(w_y)

# Note that we use the same feed-forward network and BiLSTM for both w_x and w_y
dense_layer = Dense(units=BCN_BI_UNITS,activation="relu")
relu_x = dense_layer(w_x_drop)
relu_y = dense_layer(w_y_drop)

layer_lstm = Bidirectional(LSTM(units=BCN_BI_UNITS, return_sequences=True, 
                       activation='sigmoid'))

X = layer_lstm(relu_x)
Y = layer_lstm(relu_y)

A = Lambda(multiply_t_b)([X, Y])

# Equation 9
A_x = Softmax(axis=-1)(A)
A_t = Permute((2, 1))(A)
A_y = Softmax(axis=-1)(A_t)

# Equation 10
C_x = Lambda(multiply_t_a)([A_x, X])
C_y = Lambda(multiply_t_a)([A_y, Y])

# Equation 11
X_times_C_y = Multiply()([X, C_y])
X_subtract_C_y = Subtract()([X,C_y])
x_concat = Concatenate(axis=2)([X, X_subtract_C_y, X_times_C_y])

x_mask = Masking(mask_value = 0.0)(x_concat)
X_y = Bidirectional(LSTM(units=BCN_BI_UNITS, return_sequences = True))(x_mask)

# Equation 12
Y_times_C_x = Multiply()([Y, C_x])
Y_subtract_C_x = Subtract()([Y,C_x])
y_concat = Concatenate(axis=2)([Y, Y_subtract_C_x, Y_times_C_x])

y_mask = Masking(mask_value = 0.0)(y_concat)
Y_x = Bidirectional(LSTM(units=BCN_BI_UNITS, return_sequences = True))(y_mask)

# Equation 13
X_y_d = Dropout(rate=BCN_DROPOUT)(X_y)
Y_x_d = Dropout(rate=BCN_DROPOUT)(Y_x)

B_x = Dense(units = 1, activation='softmax')(X_y_d)
B_y = Dense(units = 1, activation='softmax')(Y_x_d)

# Equation 14
x_self = Lambda(multiply_t_a)([X_y, B_x])
y_self = Lambda(multiply_t_a)([Y_x, B_y])

x_self_n = Lambda(squeeze)(x_self)
y_self_n= Lambda(squeeze)(y_self)

x_max_pool = Lambda(max_pooling)(X_y)
x_mean_pool =Lambda(mean_pooling)(X_y)
x_min_pool =Lambda(min_pooling)(X_y)

y_max_pool = Lambda(max_pooling)(Y_x)
y_mean_pool = Lambda(max_pooling)(Y_x)
y_min_pool =Lambda(max_pooling)(Y_x)

x_pool = Concatenate(axis= -1)([x_max_pool, x_mean_pool, x_min_pool, x_self_n])
y_pool = Concatenate(axis =-1)([y_max_pool, y_mean_pool, y_min_pool, y_self_n])

# Maxout network
concat_xy = Concatenate(axis =1)([x_pool, y_pool])

result_1_dropout = Dropout(rate=BCN_DROPOUT)(concat_xy)
result_1_dense = Dense(FINAL_DIM)(result_1_dropout)
result_1_norm = BatchNormalization()(result_1_dense)
result_1 = Lambda(maxout_layer1)(result_1_norm)

result_2_dropout = Dropout(rate=BCN_DROPOUT)(result_1)
result_2_dense = Dense(int(FINAL_DIM/2))(result_2_dropout)
result_2_norm = BatchNormalization()(result_2_dense)
result_2 = Lambda(maxout_layer2)(result_2_norm)

result_3_dropout = Dropout(rate=BCN_DROPOUT)(result_2)
result_3_dense = Dense(N_TARGET)(result_3_dropout)
result = tf.keras.layers.Softmax()(result_3_dense)


BCN = Model(inputs=[w_x,w_y], outputs=result)
BCN.compile(optimizer = "adam", loss = 'sparse_categorical_crossentropy',metrics=["accuracy"])
# BCN.summary()

In [0]:
# This function calculates the accuracy of the model given data X and true labels Y
def test(X, Y):
  BCN_pred = BCN.predict(x=[X, X])
  acc = 0
  for i in range(len(Y)):
    acc += int(Y[i] == BCN_pred[i].argmax(axis=0))

  return acc/len(Y)

In [0]:
test(BCN_X_test, test_sent_labels)

In [0]:
# Training
assert(len(BCN_X_test) == len(test_sent_labels))
assert(len(BCN_X_val) == len(val_sent_labels))
assert(len(BCN_X_train) == len(train_sent_labels))

losses = []
min_val_acc = 0.0
for j in range(EPOCHS):
  print("EPOCH",j)
  c = list(zip(BCN_X_train, train_sent_labels))
  np.random.shuffle(c)
  BCN_X_sh, train_sent_labels_sh = zip(*c)
  losses = []
  start = time.time()
  for i in range(BATCHES):
    output = BCN.train_on_batch(x=[BCN_X_sh[i*BATCH_SIZE:i*BATCH_SIZE+BATCH_SIZE],
                                   BCN_X_sh[i*BATCH_SIZE:i*BATCH_SIZE+BATCH_SIZE]],
                                y=[train_sent_labels_sh[i*BATCH_SIZE:i*BATCH_SIZE+BATCH_SIZE]]) 
    losses.append(output)
    print(output)
  val = test(BCN_X_val, val_sent_labels)
  print("Val accuracy:", val)
  test_acc = test(BCN_X_test, test_sent_labels)
  print("Test accuracy:", test_acc)
  
  # If this is the best seen on the validation so far, save the model
  if val > min_val_acc:
    min_val_acc = val
    bcn_json = BCN.to_json()
    train_bcn_json = os.path.join(CHECKPOINT_PATH, 'bcn.json')
    train_bcn_h5 = os.path.join(CHECKPOINT_PATH, 'bcn.h5')
    with open(train_bcn_json, "w") as json_file:
      json_file.write(bcn_json)
    # serialize weights to HDF5
    BCN.save_weights(train_bcn_h5)
    print("Saved model to disk")