
#   TEXT CLASSIFICATION USING ATTENTION MECHANISIM 


###  POC using Keras 

### Implementing attention mechanisim for  sentence-level sentiment analysis dataset collected from the University of California Irvine Machine Learning Repository

![alt text](Attention.png)

In [7]:
import csv
import os
import pandas as pd
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
label_encoder = LabelEncoder()
DIR_PATH = os.path.dirname(os.path.realpath('__file__'))
DATASETS = "datasets"
SENTIMENT_ANALYSIS = "sentiment labelled sentences"
file_path = os.path.join(DIR_PATH,SENTIMENT_ANALYSIS,"merged_dataset.txt")
df_sentiment = pd.read_csv(file_path, sep="\t", header=None)
df_sentiment.columns = ["review", "rating"]

In [8]:
df_sentiment.head()

Unnamed: 0,review,rating
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [9]:
df_sentiment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 2 columns):
review    2000 non-null object
rating    2000 non-null int64
dtypes: int64(1), object(1)
memory usage: 31.3+ KB


## Data preparation using keras preprocessing

In [10]:
t=Tokenizer()
t.fit_on_texts(df_sentiment["review"])
text_matrix=t.texts_to_sequences(df_sentiment["review"])

In [11]:
len_mat=[]
for i in range(len(text_matrix)):
    len_mat.append(len(text_matrix[i]))

In [28]:
label_encoder = LabelEncoder()
text_pad = pad_sequences(text_matrix, maxlen=32, padding='post')
integer_encode = label_encoder.fit_transform(df_sentiment["rating"])
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encode.reshape(len(integer_encode), 1)
Y = onehot_encoder.fit_transform(integer_encoded)
print(Y.shape)
text_pad.shape

(2000, 2)


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


(2000, 32)

## Model creation using Functional API of Keras

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer


In [34]:
inputs1= Input(shape=(32,))
x1=Embedding(input_dim=len(t.word_index.items())+1,output_dim=32,input_length=32,embeddings_regularizer=keras.regularizers.l2(.001))(inputs1)
x1=LSTM(100,dropout=0.3,recurrent_dropout=0.2)(x1)
outputs1=Dense(2,activation='sigmoid')(x1)
model1=Model(inputs1,outputs1)

In [35]:
model1.summary()

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 32)]              0         
_________________________________________________________________
embedding_7 (Embedding)      (None, 32, 32)            66304     
_________________________________________________________________
lstm_7 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 202       
Total params: 119,706
Trainable params: 119,706
Non-trainable params: 0
_________________________________________________________________


In [36]:
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model1.fit(x=text_pad,y=Y,batch_size=100,epochs=10,verbose=1,shuffle=True,validation_split=0.2)

Train on 1600 samples, validate on 400 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x13f8225f8>

In [37]:
def build(self,input_shape):
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)

In [38]:
def call(self,x):
        et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
        at=K.softmax(et)
        at=K.expand_dims(at,axis=-1)
        output=x*at
        return K.sum(output,axis=1)

In [41]:
class attention(Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)

    def call(self,x):
        et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
        at=K.softmax(et)
        at=K.expand_dims(at,axis=-1)
        output=x*at
        return K.sum(output,axis=1)

    def compute_output_shape(self,input_shape):
        return (input_shape[0],input_shape[-1])

    def get_config(self):
        return super(attention,self).get_config()

In [44]:
inputs=Input((32,))
x=Embedding(input_dim=len(t.word_index.items())+1,output_dim=32,input_length=32,\
            embeddings_regularizer=keras.regularizers.l2(.001))(inputs)
att_in=LSTM(100,return_sequences=True,dropout=0.3,recurrent_dropout=0.2)(x)
att_out=attention()(att_in)
outputs=Dense(2,activation='sigmoid',trainable=True)(att_out)
model=Model(inputs,outputs)
model.summary()


Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 32)]              0         
_________________________________________________________________
embedding_9 (Embedding)      (None, 32, 32)            66304     
_________________________________________________________________
lstm_8 (LSTM)                (None, 32, 100)           53200     
_________________________________________________________________
attention (attention)        (None, 100)               132       
_________________________________________________________________
dense_7 (Dense)              (None, 2)                 202       
Total params: 119,838
Trainable params: 119,838
Non-trainable params: 0
_________________________________________________________________


In [45]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(x=text_pad,y=Y,batch_size=100,epochs=10,verbose=1,shuffle=True,validation_split=0.2)

Train on 1600 samples, validate on 400 samples
Epoch 1/10


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "




  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1416d4a90>