## Import Libraries

In [4]:
import numpy as np
import pandas as pd
import re

## Read Data

In [5]:
data_df = pd.read_csv('sentiment_labels_training.csv', header=None, names=['QID','Comment','Sentiment'])

In [6]:
data_df.head()

Unnamed: 0,QID,Comment,Sentiment
0,Positive,all is good,1
1,Negative,Nothing,1
2,Negative,I believe that there is more favoritism toward...,0
3,Positive,The connection with each individual resident,1
4,Positive,no,0


In [9]:
data_df['Question'] = data_df['QID'].replace(['Positive','Negative'],['What works well: ','What needs improvement: '])

In [10]:
data_df.head()

Unnamed: 0,QID,Comment,Sentiment,Question
0,Positive,all is good,1,What works well:
1,Negative,Nothing,1,What needs improvement:
2,Negative,I believe that there is more favoritism toward...,0,What needs improvement:
3,Positive,The connection with each individual resident,1,What works well:
4,Positive,no,0,What works well:


In [11]:
data_df['Text'] = data_df['Question']+data_df['Comment']

In [12]:
data_df.head()

Unnamed: 0,QID,Comment,Sentiment,Question,Text
0,Positive,all is good,1,What works well:,What works well: all is good
1,Negative,Nothing,1,What needs improvement:,What needs improvement: Nothing
2,Negative,I believe that there is more favoritism toward...,0,What needs improvement:,What needs improvement: I believe that there i...
3,Positive,The connection with each individual resident,1,What works well:,What works well: The connection with each indi...
4,Positive,no,0,What works well:,What works well: no


## Split Data Into Train and Test Sets

In [24]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(data_df[['Text','Sentiment']], test_size=0.05)

## Build ELMO embedding layer

In [25]:
import tensorflow as tf
import tensorflow_hub as hub
from keras import backend as K
from keras.models import Model, load_model
import keras.layers as layers
from keras.engine import Layer


In [26]:
# Create a custom layer that allows us to update weights

class ElmoEmbeddingLayer(Layer):
    def __init__(self, **kwargs):
        self.dimensions = 1024
        self.trainable=True
        super(ElmoEmbeddingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
                               name="{}_module".format(self.name))

        self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
        super(ElmoEmbeddingLayer, self).build(input_shape)

    def call(self, x, mask=None):
        result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                      as_dict=True,
                      signature='default',
                      )['elmo']
        return result

    def compute_mask(self, inputs, mask=None):
        return K.not_equal(inputs, '--PAD--')

    def compute_output_shape(self, input_shape):
        return (input_shape[0],input_shape[1], self.dimensions)

In [27]:
# Create datasets (Only take up to 100 words for memory)
train_text = train_df['Text'].tolist()
train_text = [' '.join(t.split()[0:100]) for t in train_text]
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
train_label = train_df['Sentiment'].tolist()

test_text = test_df['Text'].tolist()
test_text = [' '.join(t.split()[0:100]) for t in test_text]
test_text = np.array(test_text, dtype=object)[:, np.newaxis]
test_label = test_df['Sentiment'].tolist()

## Build Model

In [28]:
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
lstm = layers.Bidirectional(layers.LSTM(1024, activation = 'relu')) (embedding)
pred = layers.Dense(1, activation='sigmoid')(lstm)
model = Model(inputs=[input_text], outputs=pred)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/aggregation/scaling:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/scaling
INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/aggregation/weights:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/weights
INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/bilm/CNN/W_cnn_0:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/CNN/W_cnn_0
INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/bilm/CNN/W_cnn_1:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/

INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel
INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias
INFO:tensorflow:Initialize variable elmo_embedding_layer_2_module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel:0 from checkpoint b'/var/folders/w5/qbqnlpz92_34zc6s3vc_f48m0000gn/T/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel
INFO

## Train Model

In [29]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())  
    session.run(tf.tables_initializer())
    history = model.fit(train_text, train_label, epochs=5, batch_size=6, validation_data=(test_text, test_label))
    model.save_weights('./elmo-model.h5')

Train on 426 samples, validate on 23 samples
Epoch 1/5

KeyboardInterrupt: 