# Using ELMO
* Since it only supports TF1.x now, we need to shift to old version
* The coding style will be quite different
* Most the code are from this [reference](https://github.com/strongio/keras-elmo/blob/master/Elmo%20Keras.ipynb)
* In tf1.x, we need to create **session** and establish the structure then executing all the code *inside* this session.

In [0]:
%tensorflow_version 1.x  # use TF1 here
import tensorflow as tf
import pandas as pd
import tensorflow_hub as hub
from keras import backend as K
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer
from keras.optimizers import Adam
import numpy as np

print(tf.__version__) # confirm version

# Initialize session
sess = tf.Session()
K.set_session(sess)

`%tensorflow_version` only switches the major version: `1.x` or `2.x`.
You set: `1.x  # use TF1 here`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


Using TensorFlow backend.


1.15.0


## Load the data
* Stemming might not be a good choice since the model cares about tense and singular/plural of the word

In [0]:
# Define some parameters
BATCH_SIZE = 128
EMBED_SIZE = 1024
MAX_LENGTH = 32

In [0]:
DATA = pd.read_csv('train_tokenize_nostem.csv')

If we need to handle the tokenization by ourselves...
we don't need that in ELMO, but I still provided here

In [0]:
# Convert str into list, some detail when we read in the list from csv file
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
from ast import literal_eval
print(type(DATA.loc[0,'TOKEN']))

# convert str back to correct list type, this happens since we store the file into .csv
DATA['TOKEN'] = DATA['TOKEN'].apply(literal_eval)
print(type(DATA.loc[0,'TOKEN']))
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# padding to MAX_LENGTH (computer only support rectangular(uniform) object)
for row in DATA['TOKEN']:
  if len(row) < MAX_LENGTH:
    row.extend(['' for _ in range(MAX_LENGTH-len(row))])

In [0]:
#split to train and val
from sklearn.model_selection import train_test_split

DATA_train, DATA_val= train_test_split(DATA, test_size=0.25)
print('DATA_train.shape: ', DATA_train.shape)
print('DATA_val.shape: ', DATA_val.shape)

DATA_train.shape:  (35150, 13)
DATA_val.shape:  (11717, 13)


In [0]:
# Select the correct column we need
TOKEN_train = DATA_train.loc[:,'Sentences']
# TOKEN_train = [' '.join(t.split()[0:MAX_LENGTH]) for t in TOKEN_train]
TOKEN_train = np.array(TOKEN_train, dtype=object)[:, np.newaxis]

POSITION_train = DATA_train.loc[:,['POSITION','TOTAL_LEN']].to_numpy()
LABEL_train = DATA_train.loc[:,'BACKGROUND':'OTHERS'].to_numpy()

TOKEN_val = DATA_val.loc[:,'Sentences']
# TOKEN_val = [' '.join(t.split()[0:MAX_LENGTH]) for t in TOKEN_val]
TOKEN_val = np.array(TOKEN_val, dtype=object)[:, np.newaxis]

POSITION_val = DATA_val.loc[:,['POSITION','TOTAL_LEN']].to_numpy()
LABEL_val = DATA_val.loc[:,'BACKGROUND':'OTHERS'].to_numpy()

print(TOKEN_train.shape, TOKEN_val.shape)
print(POSITION_train.shape, POSITION_val.shape)
print(LABEL_train.shape, LABEL_val.shape)

(35150, 1) (11717, 1)
(35150, 2) (11717, 2)
(35150, 6) (11717, 6)


In [0]:
# some example 
print(TOKEN_train[0:2])
print(TOKEN_val[0:2])
print(POSITION_train[0:2])

[['testing in continuous integration involves test case prioritization selection and execution at each cycle .']
 ['we propose a new task for grounding language in this environment given a natural language command e .g . click on the second article choose the correct element on the web page e .g . a hyperlink or text box .']]
[['our method improved results across all languages .']
 ['the proposed algorithm relies on abductive-inductive learning and comprises a scalable clause refinement methodology based on a compressive summarization of clause coverage in a stream of examples .']]
[[1 6]
 [2 7]]


## Load the model (testing)
Elmo care about 
  * Punctuation
  * Tense
  * plural/single
  * upper/lower case (https://github.com/tensorflow/hub/issues/215)
  * Also, accept numbers input (in str form)

In [0]:
elmo = hub.Module("https://tfhub.dev/google/elmo/3",trainable=True)

In [0]:
# test
embeddings = elmo(['a.k.a.', 'e.g.'],
    signature="default", as_dict=True)["elmo"]

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [0]:
##we can load the model in two method, actually I haven't figure out their difference
# elmo = hub.KerasLayer("https://tfhub.dev/google/elmo/3",trainable=True, signature="default",output_key="elmo")

##test hub.KerasLayer, no need to specified the signature and output
# embeddings = elmo(K.squeeze(K.cast(TOKEN_train[0:2], tf.string), axis=1))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [0]:
# run and check the result
sess.run(tf.global_variables_initializer())
embedding = sess.run(embeddings)
print(embedding.shape)
print(embedding)

(2, 1, 1024)
[[[-0.37405166 -0.5001462   0.13569671 ... -0.64118576  0.5069206
   -0.63167405]]

 [[-0.51674646  0.02469666 -0.22523886 ... -0.32503623 -0.00995439
   -0.07440427]]]


In [0]:
# test with our data
embeddings = elmo(K.squeeze(K.cast(TOKEN_train[0:2], tf.string), axis=1),
    signature="default", as_dict=True)["elmo"]

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [0]:
# run and check the result
sess.run(tf.global_variables_initializer())
embedding = sess.run(embeddings)
print(embedding.shape)

(2, 19, 1024)


## Define the model
Create a custom layer that allows us to update weights (lambda layers do not have trainable parameters!)

In [0]:
#Everytime you want to try some new thing (like change the model structure), you need to clear the session first
#to prevent something inside the graph are left and affect the model or the result
K.clear_session()

In [0]:
class ElmoEmbeddingLayer(Layer):
    def __init__(self, **kwargs):
      self.dimensions = 1024
      self.trainable=True
      super(ElmoEmbeddingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
      self.elmo = hub.Module('https://tfhub.dev/google/elmo/3', trainable=self.trainable,
                              name="{}_module".format(self.name))

      self.trainable_weights += tf.trainable_variables(scope="^{}_module/.*".format(self.name))
      super(ElmoEmbeddingLayer, self).build(input_shape)

    def call(self, x, mask=None):
      result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                    as_dict=True,
                    signature='default',
                    )['elmo']
      return result

    # def compute_mask(self, inputs, mask=None):  #Performance increase without masking, don't know the reason yet
    #   return K.not_equal(inputs, '')

    def compute_output_shape(self, input_shape):
      return (input_shape[0], None, self.dimensions)

If we don't need to fine tune the weight in ELMO, we can just use the LambdaLayer

In [0]:
def ELMOWordsEmbedding(tokens_input):
  result = elmo(
      K.squeeze(K.cast(tokens_input, tf.string), axis=1),
      signature="default",
      as_dict=True)["elmo"]
  return result

def compute_mask(tokens_input, mask=None):
  return  K.not_equal(K.cast(tokens_input, tf.string),'')

Valuation function

In [0]:
# Function to calculate F1_score
def F1_score(y_true, y_pred):
  DTYPE = tf.float32
  THRESHOLD = 0.5

  y_pred = tf.cast(y_pred > THRESHOLD, DTYPE) 

  true_positives = tf.math.count_nonzero(tf.math.logical_and(tf.math.equal(y_pred,1.0), tf.math.equal(y_true,1.0)), axis=0)
  false_positives = tf.math.count_nonzero(tf.math.logical_and(tf.math.equal(y_pred,1.0), tf.math.equal(y_true,0.0)), axis=0)
  false_negatives = tf.math.count_nonzero(tf.math.logical_and(tf.math.equal(y_pred,0.0), tf.math.equal(y_true,1.0)), axis=0)

  TP = tf.math.reduce_sum(tf.cast(true_positives, DTYPE), axis=0)
  FP = tf.math.reduce_sum(tf.cast(false_positives, DTYPE), axis=0)
  FN = tf.math.reduce_sum(tf.cast(false_negatives, DTYPE), axis=0)

  precision = tf.math.divide_no_nan(TP, TP+FP)
  recall = tf.math.divide_no_nan(TP, TP+FN)

  F1 = tf.math.divide_no_nan(2 * (precision * recall) , (precision + recall))
  return F1

In [0]:
# Function to build model
def build_model(): 
  input_text = layers.Input(shape=(1,), name='Text_input', dtype=tf.string)
  input_position = layers.Input(shape=(2,), name='Sentence_position', dtype=tf.float32)

  # Using Lambda layer - this will result in a fixed ELMO embedding layer
  #embedding = layers.Lambda(ELMOWordsEmbedding, output_shape=(None, 1024), mask=compute_mask)(input_text)
  # ELMO embedding with trainable weight
  embedding = ElmoEmbeddingLayer(name="ElmoEmbed")(input_text)
  GRU = layers.Bidirectional(layers.GRU(256, dropout=0.5, return_sequences=False), name='BiGRU')(embedding)

  concat = layers.concatenate([GRU, input_position], name='Merge')
  pred = layers.Dense(6, activation='sigmoid')(concat)

  model = Model(inputs=[input_text, input_position], outputs=pred)
  adam = Adam(lr=0.001)

  model.compile(loss='binary_crossentropy', optimizer=adam, metrics=[F1_score])
  model.summary()
  
  return model

model = build_model()
# save the weight for later retraining the whole module
weights = model.get_weights()


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore




















Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.














Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
reduction_indices is deprecated, use axis instead


Instructions for updating:
reduction_indices is deprecated, use axis instead


Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Text_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
ElmoEmbed (ElmoEmbeddingLayer)  (None, None, 1024)   4           Text_input[0][0]                 
__________________________________________________________________________________________________
BiGRU (Bidirectional)           (None, 512)          1967616     ElmoEmbed[0][0]                  
__________________________________________________________________________________________________
Sentence_position (InputLayer)  (None, 2)            0                                            
____________________________________________________________________________________________






















## Training

In [0]:
# Monitor the performance
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_F1_score', patience=3, mode='max')

history = model.fit([TOKEN_train,POSITION_train], LABEL_train,
          validation_data=([TOKEN_val,POSITION_val], LABEL_val),
          epochs=15, batch_size=BATCH_SIZE,
          callbacks=[early_stopping])













Train on 35150 samples, validate on 11717 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


## Evaluate

In [0]:
result = model.predict([TOKEN_val,POSITION_val])

print(result.shape)
print(result[-5:-1])

(11717, 6)
[[0.00588551 0.01260564 0.03800076 0.92279714 0.131506   0.00637618]
 [0.48207766 0.16796616 0.37107742 0.07503074 0.07100105 0.01586792]
 [0.7165873  0.04636222 0.06908572 0.05316025 0.01516324 0.00488138]
 [0.07452565 0.06215289 0.6697212  0.34838733 0.03677234 0.00340721]]


In [0]:
from sklearn.metrics import f1_score

greater = (result>=0.5).astype(int)
print(LABEL_val.shape)
print(greater.shape)
print(f1_score(LABEL_val, greater, average='micro'))

(11717, 6)
(11717, 6)
0.6473844105039744


### Refit on the whole data with around 5 epochs

In [0]:
ALL_train = DATA.loc[:,'Sentences']
ALL_train = np.array(ALL_train, dtype=object)[:, np.newaxis]
ALL_POSITION_train = DATA.loc[:,['POSITION','TOTAL_LEN']].to_numpy()

ALL_LABEL = DATA.loc[:,'BACKGROUND':'OTHERS'].to_numpy()

In [0]:
print(ALL_train.shape)
print(ALL_POSITION_train.shape)
print(ALL_LABEL.shape)

(46867, 1)
(46867, 2)
(46867, 6)


In [0]:
# reload the model weight
model.set_weights(weights)
# train with best epoch = 5
history = model.fit([ALL_train,ALL_POSITION_train], ALL_LABEL,
          epochs=5, batch_size=BATCH_SIZE)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Predict

In [0]:
TEST_DATA = pd.read_csv('test_tokenize_nostem.csv')
test = TEST_DATA.loc[:,'Sentences']
test = np.array(test, dtype=object)[:, np.newaxis]
POSITION_test = TEST_DATA.loc[:,['POSITION','TOTAL_LEN']].to_numpy()

print(test.shape)
print(POSITION_test.shape)

(131166, 1)
(131166, 2)


In [0]:
TEST_RESULT = model.predict([test,POSITION_test])

print(TEST_RESULT.shape)
print(TEST_RESULT[-5:-1])

(131166, 6)
[[0.00370848 0.00314283 0.01640809 0.7887437  0.57728094 0.00105613]
 [0.9920742  0.02068135 0.01025519 0.00269032 0.00636393 0.00972047]
 [0.16082975 0.6936044  0.29678756 0.07848769 0.07863769 0.01464307]
 [0.02329394 0.14286119 0.21164942 0.43658984 0.16394943 0.08761445]]


In [0]:
RESULT = pd.DataFrame(TEST_RESULT, columns=DATA.loc[:,'BACKGROUND':'OTHERS'].columns)
RESULT.head()

Unnamed: 0,BACKGROUND,OBJECTIVES,METHODS,RESULTS,CONCLUSIONS,OTHERS
0,0.981662,0.046026,0.010622,0.000734,0.003867,0.007027
1,0.863804,0.289979,0.028492,0.018001,0.036511,0.005382
2,0.174576,0.405271,0.710111,0.044601,0.054216,0.005105
3,0.013194,0.735298,0.343895,0.105276,0.062246,0.001103
4,0.007108,0.01052,0.109356,0.668826,0.203339,0.023432


In [0]:
# save to csv file
RESULT.to_csv('test_result'.csv', index=False)