# Implementing LSTM models for Aspect term polarity detection

**Paper Reference:** [Effective LSTMs for Target-Dependent Sentiment Classification](https://www.aclweb.org/anthology/C16-1311.pdf)

In [1]:
from IPython.display import Image
from dl_utils import prepare_data_for_dl
from utils import f1
import os

os.environ['KERAS_BACKEND'] = 'tensorflow'


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


# Define the various global variables

## For the input data

In [2]:
MAX_INPUT_LENGTH = 80
MAX_ASPECT_LENGTH = 5
EMBEDDING_DIM = 200


## Prepare the training and test data

Various Experiments can be run using the function:

1. In case of glOve choose the embed_dim as 50, 100, 200, 300d
2. Choose the embedding type i.e. glove, google (300d), restaurants, laptops
3. Choose a combination of embeddings i.e. double embeddings glove and restaurants
4. Choose whether to concatenate POS tags or not

In [3]:
train_data = prepare_data_for_dl('restaurants', 'train', embed_dim=EMBEDDING_DIM, 
                                 max_input_len=MAX_INPUT_LENGTH, max_aspect_len=MAX_ASPECT_LENGTH, 
                                 embed_type=['glove.twitter', 'restaurants'], concat_pos_tag=True)
tokenizer = train_data['tokenizer']
embedding_matrix = train_data['embedding_matrix']
test_data = prepare_data_for_dl('restaurants', 'test', max_input_len=MAX_INPUT_LENGTH, 
                                max_aspect_len=MAX_ASPECT_LENGTH, tokenizer=tokenizer)

Word vectors found for 93.10% of vocabulary
Word vectors found for 100.00% of vocabulary


In [24]:
train_data['reviews_raw_idx'].shape

(4728, 80)

In [25]:
X_train, y_train = train_data['reviews_raw_idx'], train_data['polarity_ohe']
X_test, y_test = test_data['reviews_raw_idx'], test_data['polarity_ohe']

In [6]:
postags_train = train_data['postags_raw']
postags_test = test_data['postags_raw']

In [7]:
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical



In [8]:
from keras.models import Model
from keras.layers import Input, Dense, Activation, LSTM, Embedding, GlobalMaxPool1D, Conv1D, Dropout
from keras.utils import plot_model
from keras.optimizers import Adam

## Simple LSTM

![](../reports/Simple_LSTM.png)

In [9]:
import numpy as np
# embedding_matrix = np.zeros(shape=(3755, 406))


In [10]:
from keras import backend as K
K.clear_session()

In [11]:
from keras.layers import Concatenate, Conv1D, Bidirectional, GlobalMaxPool1D, MaxPooling1D, Flatten
from keras.layers import SpatialDropout1D, Average, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from keras.initializers import RandomUniform

In [12]:
from keras.utils.vis_utils import plot_model

In [13]:
init = RandomUniform(minval=-0.003, maxval=0.003)
reg = l2(0.001)

LSTM_PARAMS = {
'units': 200,
'activation': 'tanh',
'recurrent_activation': 'sigmoid',
'kernel_initializer': init,
'recurrent_initializer': init,
'bias_initializer': init,
'kernel_regularizer': reg,
'recurrent_regularizer': reg,
'bias_regularizer': reg,
'dropout': 0,
'recurrent_dropout': 0,
}

In [15]:
# input layer
inputs = Input(shape=(MAX_INPUT_LENGTH, ))

# Embedding layer
x = Embedding(input_dim=len(tokenizer.word_index) + 1,
              output_dim=306,
              input_length=80,
              weights=[embedding_matrix],
              mask_zero=True,
              trainable=False)(inputs)


# LSTM layer
x = LSTM(**LSTM_PARAMS)(x)
# x = Conv1D(256, 5, activation='relu')(x)
# x = Dropout(0.5)(x)
# x = Conv1D(128, 5, activation='relu')(x)
# x= GlobalMaxPool1D()(x)

# Finally compute the probabilities
preds = Dense(3, activation='softmax')(x)

# Specify the input and the output
model = Model(inputs, preds)
model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(lr=0.01), metrics=['acc', f1])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 80)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 80, 306)           1343646   
_________________________________________________________________
lstm_1 (LSTM)                (None, 200)               405600    
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 603       
Total params: 1,749,849
Trainable params: 406,203
Non-trainable params: 1,343,646
_________________________________________________________________


In [26]:
plot_model(model, to_file="SimpleLSTM.png", show_shapes=True, show_layer_names=True)

In [4]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=128, epochs=10)

## Target dependent LSTM

![](../reports/Target_dep_LSTM.png)

In [73]:
X_train = [train_data['reviews_left_with_aspects_idx'], train_data['reviews_right_with_aspects_idx']]
X_test = [test_data['reviews_left_with_aspects_idx'], test_data['reviews_right_with_aspects_idx']]

In [78]:
# We need two inputs, the left side and the right side of the aspect (including the aspect in both)

left_input = Input(shape=(MAX_INPUT_LENGTH,))
right_input = Input(shape=(MAX_INPUT_LENGTH,))


# Embedding layer
Embedding_Layer = Embedding(input_dim=len(tokenizer.word_index) + 1,
              output_dim=300,
              input_length=80,
              mask_zero=False,
              weights=[embedding_matrix],
              trainable=False)

# Obtain the vectors from the embedding layers for
# the left and right sequences
left_x = Embedding_Layer(left_input)
right_x = Embedding_Layer(right_input)


# left_x = SpatialDropout1D(0.2)(left_x)
# right_x = SpatialDropout1D(0.2)(right_x)

# Pass both through separate LSTMS
left_x = LSTM(**LSTM_PARAMS)(left_x)
right_x = LSTM(**LSTM_PARAMS, go_backwards=True)(right_x)

# left_x = Conv1D(filters=256, kernel_size=5, activation='relu', padding='same')(left_x)
# left_x = Dropout(0.55)(left_x)
# left_x = Conv1D(filters=128, kernel_size=5, activation='relu')(left_x)
# left_x = MaxPooling1D(2)(left_x)
# # left_x = GlobalMaxPool1D()(left_x)
# left_x = Flatten()(left_x)
# # # left_x = LSTM(units=50)(left_x)

# right_x = Conv1D(filters=256, kernel_size=5, activation='relu', padding='same')(right_x)
# right_x = Dropout(0.55)(right_x)
# right_x = Conv1D(filters=128, kernel_size=5, activation='relu')(right_x)
# right_x = MaxPooling1D(2)(right_x)
# # right_x = GlobalMaxPool1D()(right_x)
# right_x = Flatten()(right_x)
# right_x = LSTM(go_backwards=True, units=50)(right_x)



# Concatenate the final hidden states

# x = Average()([left_x, right_x])
x = Concatenate()([left_x, right_x])

# Finally compute the probabilities
preds = Dense(3, activation='softmax')(x)

# Specify the input and the output
model = Model(inputs=[left_input, right_input], outputs=preds)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), metrics=['acc', f1])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 80)           0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            (None, 80)           0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 80, 300)      1126500     input_7[0][0]                    
                                                                 input_8[0][0]                    
__________________________________________________________________________________________________
lstm_4 (LSTM)                   (None, 200)          400800      embedding_5[0][0]                
__________

In [79]:
plot_model(model, to_file="TD-LSTM.png", show_shapes=True, show_layer_names=True)

In [80]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64)

Train on 3608 samples, validate on 1120 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd9c97844e0>

In [67]:
y_pred = model.predict(X_test).argmax(axis=1)


In [68]:
from sklearn.metrics import classification_report, f1_score, accuracy_score

In [69]:
f1_score(y_test.argmax(axis=1), y_pred, average='micro')

0.7517857142857143

In [70]:
accuracy_score(y_test.argmax(axis=1), y_pred)

0.7517857142857143

In [71]:
print(classification_report(y_test.argmax(axis=1), y_pred, ))

              precision    recall  f1-score   support

           0       0.50      0.31      0.38       196
           1       0.83      0.90      0.86       728
           2       0.60      0.65      0.63       196

    accuracy                           0.75      1120
   macro avg       0.64      0.62      0.62      1120
weighted avg       0.73      0.75      0.74      1120

