# Attention Model in KERAS - SENTIMENT ANALYSIS

## Why are we using a package?
#### The difficulty I felt when I first used was to load the saved model. Attention isn't a layer, it is mathematical operation. When we try to load the model again, it will through an error saying there is no Attention layer. When loading the saving model we'll use get_config method present in the package

In [1]:
!pip install keras_self_attention
!pip install nlppreprocess
## Uncomment this to use glove embedding.
# !wget http://nlp.stanford.edu/data/glove.42B.300d.zip

In [2]:
import os

# I don't have NVIDIA GPU, this is why I used plaidml to train models on my AMD GPU.
os.environ['KERAS_BACKEND'] = "plaidml.keras.backend"

from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Dense, Embedding, Input
from keras.layers import LSTM, Bidirectional, Dropout
from keras_self_attention import SeqWeightedAttention
import pandas as pd
import numpy as np
tokenizer = None

Using plaidml.keras.backend backend.


In [23]:
def BidLstm(maxlen, max_features, embed_size, embedding_matrix):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.25,
                           recurrent_dropout=0.25))(x)
    x = SeqWeightedAttention()(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(11, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.summary()

    return model

In [13]:
import pandas as pd
from keras.preprocessing import text, sequence

def make_df(train_path, max_features, maxlen, list_classes):
    
    # Load dataset
    train = pd.read_csv(train_path, delimiter='\t')
    
    # Load all reviews into a list
    sentences = train[list_classes[0]].values
    
    # Create tokenizer to extract words from reviews with higher frequency
    tokenizer = text.Tokenizer(num_words=max_features)
    
    # Apply the tokenizer to extract top words
    tokenizer.fit_on_texts(list(sentences))
    
    list_tokenized_train = tokenizer.texts_to_sequences(sentences)
    
    # Pad sentences with 0 either in beginning or end of sentence to make all sentences of equal length.
    X_t = sequence.pad_sequences(list_tokenized_train, maxlen=maxlen)
    
    # Load target class into list
    y = train[list_classes[1]].values
    
    # ANN's accept target class in form of sparse matrix
    # to_categorical convert it into spare matrix form
    y = to_categorical(y)

    word_index = tokenizer.word_index

    return X_t, y, word_index

In [20]:
import numpy as np
from smart_open import open

def make_glovevec(glovepath, max_features, embed_size, word_index, veclen=300):
    embeddings_index = {}
    f = open(glovepath, encoding="utf8")
    for line in f:
        values = line.split()
        word = ' '.join(values[:-300])
        coefs = np.asarray(values[-300:], dtype='float32')
        embeddings_index[word] = coefs.reshape(-1)
    f.close()

    nb_words = min(max_features, len(word_index))
    embedding_matrix = np.zeros((nb_words, embed_size))
    for word, i in word_index.items():
        if i >= max_features:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    return embedding_matrix

In [15]:
# !wget https://archive.ics.uci.edu/ml/machine-learning-databases/00462/drugsCom_raw.zip
# !unzip drugsCom_raw.zip

In [None]:
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint
np.random.seed(7)


if __name__ == "__main__":
    max_features = 30000
    maxlen = 100
    embed_size = 300
    list_classes = ['review', 'rating']
    
    
#     print("Preparing Data...")
#     xtr, y, word_index = make_df("data/train.tsv",
#                                       max_features, maxlen, list_classes)
    
#     print("Creating Glove Embedding...")
#     embedding_vector = make_glovevec("glove.42B.300d/glove.42B.300d.txt",
#                                      max_features, embed_size, word_index)
    
    print("Creating Model...")
    model = BidLstm(maxlen, max_features, embed_size, embedding_vector)
    model.compile(loss='categorical_crossentropy', optimizer='adam',
                  metrics=['accuracy'])
    file_path = ".model.hdf5"
    ckpt = ModelCheckpoint(file_path, monitor='val_loss', verbose=1,
                           save_best_only=True, mode='min')
    early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
    
    print("Starting Model Training...")
    model.fit(xtr, y, batch_size=16, epochs=2, validation_split=0.01, callbacks=[ckpt, early])
    
    model.save('saved_model.model')
    
    import pickle
    with open('tokenizer.pkl','wb') as file:
        pickle.dump(tokenizer, file)
    print("tokenizer saved")

Creating Model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 100)               0         
_________________________________________________________________
embedding_6 (Embedding)      (None, 100, 300)          9000000   
_________________________________________________________________
bidirectional_6 (Bidirection (None, 100, 512)          1140736   
_________________________________________________________________
seq_weighted_attention_6 (Se (None, 512)               513       
_________________________________________________________________
dense_11 (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 11)                282

INFO:plaidml:Analyzing Ops: 1833 of 21341 operations complete
INFO:plaidml:Analyzing Ops: 4956 of 21341 operations complete
INFO:plaidml:Analyzing Ops: 7174 of 21341 operations complete
INFO:plaidml:Analyzing Ops: 10455 of 21341 operations complete
INFO:plaidml:Analyzing Ops: 14176 of 21341 operations complete
INFO:plaidml:Analyzing Ops: 17615 of 21341 operations complete
INFO:plaidml:Analyzing Ops: 20564 of 21341 operations complete


   44/53228 [..............................] - ETA: 32:27:27 - loss: 2.2556 - acc: 0.2273