Approach to emulate:

1.Input Layer

2.Embedding layer

3.BLSTM layer, with element-wise sum of forward/backward pass outputs\

Classification should have dropout  applied on the embedding layer, LSTM layer and penultimate layer. as Well as L2 regularization

4.Attention Layer

5.Output Layer

In [None]:
import os
os.environ["KERAS_BACKEND"]='theano'
os.environ["KERAS_BACKEND"]='tensorflow'
import keras
keras.backend.backend()
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional, GRU, Input, TimeDistributed
from keras.models import Sequential, Model, load_model
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers
import lmdb
from lmdb_embeddings.reader import LmdbEmbeddingsReader
import lmdb_embeddings.exceptions as exceptions
from keras.utils import to_categorical, np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer
import string
import re
import spacy
import tensorflow as tf
nlp=spacy.load('en_core_web_sm')

In [None]:
#Theano
class AttLayer(Layer):
    def __init__(self, **kwargs):
        self.init = initializers.get('normal')
        #self.input_spec = [InputSpec(ndim=3)]
        super(AttLayer, self).__init__(** kwargs)

    def build(self, input_shape):
        assert len(input_shape)==3
        self.W = self.add_weight(name='kernel', 
        shape=(input_shape[-1],),
        initializer='normal',
        trainable=True)
        super(AttLayer, self).build(input_shape)  
        
    def call(self, x, mask=None):
        eij = K.tanh(K.dot(x, self.W))

        ai = K.exp(eij)
        weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')

        weighted_input = x*weights.dimshuffle(0,1,'x')
        return weighted_input.sum(axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

In [255]:
#Tensorflow
class AttLayer(Layer):
    def __init__(self, **kwargs):
        self.init = initializers.get('normal')
        #self.input_spec = [InputSpec(ndim=3)]
        super(AttLayer, self).__init__(** kwargs)

    def build(self, input_shape):
        assert len(input_shape)==3
        self.W = self.add_weight(name='kernel', 
        shape=(input_shape[-1],),
        initializer='normal',
        trainable=True)
        super(AttLayer, self).build(input_shape)  
        
    def call(self, x, mask=None):
        eij = K.tanh(np.dot(x, self.W))
        ai = K.exp(eij)
        product=tf.expand_dims(K.sum(ai, axis=1), 1)
        weights = ai/product 
        weighted_input = x*weights
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

In [None]:
classifier=Sequential()
classifier.add(Bidirectional(GRU(units=25, return_sequences=True), input_shape=(6,300)))
classifier.add(AttLayer())
classifier.add(Dropout(0.3))
classifier.add(Dense(units=4, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [None]:
classifier=load_model('models/emotions_blstm_att_tf.h5', custom_objects={'AttLayer': AttLayer()})

In [20]:
embeddings=LmdbEmbeddingsReader('data/lmdb_databases')
encoder=LabelEncoder()

In [82]:
data=pd.read_csv('data/isear_plus_semeval.csv')

In [17]:
negative = ['not', 'no', 'neither', 'nor', 'but', 'however', 'although', 'nonetheless', 'despite', 'except',
                         'even though', 'yet']
stop = list(set(stopwords.words('english')))
for neg in negative:
    for stopword in stop:
        if stopword==neg:
            stop.remove(stopword)
rm=['don\'t', 'shouldn\'t', 'doesn\'t', 'didn\'t']
for r in rm:
    stop.remove(r)
exclude = set(string.punctuation)
exclude.add('\n')

In [21]:
def clean(doc):
    lemma=WordNetLemmatizer()
    stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    punc_free = "".join([ch for ch in stop_free if ch not in exclude])
    re.sub(r'\n', '', punc_free)
    normalized = " ".join([lemma.lemmatize(word) for word in punc_free.split()])
    return normalized

In [22]:
def vec_words(li):
    total_vecs=[]
    for word in li:
        try:
            vector = embeddings.get_word_vector(word)
        except exceptions.MissingWordError:
            # 'google' is not in the database.
            vector= np.zeros(300, dtype='float32')
        total_vecs.append(vector)
    return np.array(total_vecs)

In [23]:
def transform_y(y):
    encoder.fit(y)
    y=encoder.transform(y)
    y_1=np_utils.to_categorical(y)
    #y_1=np.reshape(y_1, (-1, 4, 1))
    return y_1

In [24]:
def word_splits(series):
    word_splits=series.str.split(' ')
    return word_splits

In [None]:
def transform_6(X,y=None):
    X=pd.Series(X).apply(clean).apply(input_duplicator_train)
    splits=word_splits(X)
    numbers_series=splits.apply(vec_words)
    num_docs=len(numbers_series)
    X_1=[]
    y_1=[]
    for index in range(0, num_docs):
        doc=numbers_series.iloc[index]
        print(len(doc))
        for i in range(6, len(doc)):
            X_1.append(doc[i-6:i])
            if y is not None:
                y_1.append(y.iloc[index])
                y_1=transform_y(y_1)
    if y is not None:
        return np.array(X_1), np.array(y_1)
    else:
        return np.array(X_1)

In [None]:
def input_duplicator_train(text):
    splits=text.split(' ')
    while len(splits)<7:
        orig_doc=splits.copy()
        for word in orig_doc:
            splits.append(word)
    return ' '.join(splits)

In [None]:
def input_duplicator(text):
    X=pd.Series(text).apply(clean)
    splits=word_splits(X)
    numbers_series=splits
    num_docs=len(numbers_series)
    for index, doc in enumerate(numbers_series):
        while len(doc)<7:
            orig_doc=doc.copy()
            orig_doc=list(orig_doc)
            doc=list(doc)
            for word in orig_doc:
                doc.append(word)
                #doc=np.insert(doc,(len(doc)),word, axis=0)
                #doc=np.append(doc, word, axis=1)
            modified=True
        numbers_series.iloc[index]=np.array(doc)
    X_1 = []
    if num_docs>1:
        for index in range(0, num_docs):
            doc=numbers_series.iloc[index]
            for i in range(6, len(doc)):
                X_1.append(doc[i-6:i])
    else:
        doc=numbers_series.iloc[0]
        print(doc.shape)
        for i in range(6, len(doc)):
                X_1.append(doc[i-6:i])
    return X_1

In [152]:
def predict(classifier, text):
    X=transform(text)
    prediction=classifier.predict(X)
    prediction=np.mean(prediction, axis=0)
    return prediction

In [None]:
X_1, y_1=transform(data['1'], data['0'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_1, y_1, test_size = 0.2, random_state = 0)

In [None]:
classifier.fit(X_train, y_train, epochs=5)

In [None]:
#Previous best with theano
classifier.evaluate(X_test, y_test)
#Theano training is many, many times slower than tf

In [None]:
#With Tensorflow
classifier.evaluate(X_test, y_test)
#Comparable results to without attention. This needs better implementation, closer to the papers.

In [None]:
predict('''I do not happy''')

In [None]:
classifier.save('models/emotions_blstm_att_tf.h5')

<h2>Hierachical Attention Network With Buckets</h2>

In [None]:
#Sentence Segmentation
text='Sentence #one... I hope it picks this up. Sentence LMFAO two! Sentence three?'
tokens=nlp(text)
for s in tokens.sents:
    print(s)

In [98]:
from bucketed_sequence import BucketedSequence
from keras.preprocessing.sequence import pad_sequences
from absl import app

UNK = np.zeros(300)
#FLAGS = flags.FLAGS

'''flags.DEFINE_integer('batch_size', 64, 'Batch size')
flags.DEFINE_integer('epochs', 20, 'Number of epochs to train')
flags.DEFINE_integer('lstm_units', 50, 'Number of LSTM units in RNN')
flags.DEFINE_integer('dense_breadth', 64, 'Number of neurons in the dense ' +
                     'layer')

flags.DEFINE_integer('dataset_size', 4726, 'Size of training dataset')
flags.DEFINE_integer('val_size', 1182, 'Size of validation set')
flags.DEFINE_integer('buckets', 4, 'Number of buckets to use (run with ' +
                     '0 to disable)')'''

'''flags.DEFINE_integer('seqlen_mean', 50, 'Sequence length mean (drawn ' +
                     'from normal distribution)')
flags.DEFINE_integer('seqlen_stddev', 200, 'Sequence length standard ' +
                     'deviation (drawn from normal distribution)')'''

batch_size=64
epochs=100
lstm_units=25
dense_breadth=64
buckets=4

In [258]:
classifier=Sequential()
classifier.add(Bidirectional(GRU(units=25, return_sequences=True), input_shape=(54,300)))
classifier.add(LSTM(50, return_sequences=True))
classifier.add(AttLayer())
classifier.add(Dropout(0.3))
classifier.add(Dense(units=4, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [238]:
def pad(seqs, maxlen, UNK):
    # NOTE: prepends data
    padded = np.array(pad_sequences(seqs, maxlen=maxlen, value=UNK, 
                                    dtype=seqs[0].dtype))
    return np.vstack([np.expand_dims(x, axis=0) for x in padded])

In [107]:
def main(model):
    # Prepare data
    X,y=transform(data['1'], data['0'])
    len_train=[x.shape[0] for x in X[:4000]]
    len_val=[x.shape[0] for x in X[4000:]]
    sequence_lengths = [x.shape[0] for x in X]
    X = pad(X, np.max(sequence_lengths))
    X_train=X[:4000]
    X_test=X[4000:]
    y_train=y[:4000]
    y_test=y[4000:]
    if buckets > 0:
        # Create Sequence objects
        train_generator = BucketedSequence(buckets, batch_size,
                                           len_train, X_train, y_train)
        val_generator = BucketedSequence(buckets, batch_size,
                                         len_val, X_test, y_test)

        model.fit_generator(train_generator, epochs=epochs,
                            validation_data=val_generator,
                            shuffle=False, verbose=True)
    else:
        # No bucketing
        model.fit(x=x_train, y=y_train, epochs=epochs,
                  validation_data=(x_val, y_val),
                  batch_size=batch_size, verbose=True, shuffle=True)

In [104]:
X,y=transform(data['1'], data['0'])
len_train=[x.shape[0] for x in X[:4000]]
len_val=[x.shape[0] for x in X[4000:]]
sequence_lengths = [x.shape[0] for x in X]
X = pad(X, np.max(sequence_lengths))
X_train=X[:4000]
X_test=X[4000:]
y_train=y[:4000]
y_test=y[4000:]

In [212]:
X=pad(X,54)

In [175]:
X[0].shape

(54, 300)

In [167]:
y=np.expand_dims(y, axis=1)

In [248]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

<h3>Negations</h3>

In [268]:
negations=pd.read_csv('data/constructed_negations.csv')

In [269]:
isear_semeval_negs=pd.concat([data, negations], axis=0)

In [271]:
data=isear_semeval_negs.sample(frac=1).reset_index(drop=True)

In [None]:
from imblearn.over_sampling import SMOTE
X,y=transform(data['1'], data['0'])

In [282]:
sm=SMOTE(random_state=1, ratio='auto', k_neighbors=5, m_neighbors=10, 
         out_step=0.5, kind='regular', n_jobs=-1)

In [None]:
X=pad(X,54, np.zeros(300))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [289]:
X_1=[]
y_1=[]
for i in range(len(X_train)):
    X_a, y_a=sm.fit_sample(np.ravel(X_train[i]), y_train[i])
    X_1.append(X_a)
    y_1.append(y_a)

ValueError: Expected 2D array, got 1D array instead:
array=[0.     0.     0.     ... 0.2891 0.1951 0.1152].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [274]:
data['0'].value_counts()

joy        3240
sadness    2334
anger      1484
fear       1458
Name: 0, dtype: int64

In [259]:
classifier.fit(X_train, y_train, epochs=20)

Epoch 1/20
Epoch 2/20


Epoch 3/20


Epoch 4/20


Epoch 5/20


Epoch 6/20


Epoch 7/20


Epoch 8/20


Epoch 9/20


Epoch 10/20


Epoch 11/20


Epoch 12/20


Epoch 13/20


Epoch 14/20


Epoch 15/20


Epoch 16/20


Epoch 17/20


Epoch 18/20


Epoch 19/20


Epoch 20/20




<keras.callbacks.History at 0x7733f2e8>

In [260]:
classifier.evaluate(X_test,y_test)



[0.862387182143739, 0.7461928939052846]

In [263]:
classifier.predict(pad(transform("I am not happy"),54, UNK=np.zeros(300)))

array([[0.0033361 , 0.00180081, 0.9900378 , 0.0048253 ]], dtype=float32)

In [235]:
X_train.shape

(4726, 54, 300)

In [177]:
model.predict(pad(transform('I am not happy'),54))

array([[0.00484337, 0.00183919, 0.96562874, 0.02768859]], dtype=float32)

In [106]:
X_train.shape

(4000, 54, 300)

In [242]:
def transform(X,y=None):
    X=pd.Series(X).apply(clean)
    splits=word_splits(X)
    numbers_series=splits.apply(vec_words)
    num_docs=len(numbers_series)
    X_1=[]
    y_1=[]
    for index in range(0, num_docs):
        doc=numbers_series.iloc[index]
        X_doc=[]
        for word in doc:
            X_doc.append(word)
        X_1.append(np.array(X_doc))
        if y is not None:
            y_1.append(y.iloc[index])
            
    if y is not None:
        y_1=transform_y(y_1)
        return np.array(X_1), np.array(y_1)
    else:
        return np.array(X_1)

In [77]:
sequence_lengths = [x.shape[0] for x in X]
padded_x = pad(X, 54)
padded_x.shape

(5908, 54, 300)

In [162]:
bucket_seqlen

[15, 28, 41, 54]

In [111]:
bucket_sizes, bucket_ranges = np.histogram(sequence_lengths,
                                                   bins=4)

In [133]:
bucket_seqlen

[15, 28, 41, 54]

In [134]:
actual_bucketsizes

[4716, 1082, 97, 13]

In [138]:
[14,4716]+list(input_shape)

[14, 4716, 300]

In [137]:
list(input_shape)

[300]

In [153]:
predict(model, 'I am not happy')

array([[0.04738307, 0.02606172, 0.56835777, 0.3581974 ],
       [0.00389095, 0.00138126, 0.9091637 , 0.08556406]], dtype=float32)

In [160]:
np.mean(model.predict(transform("I am not happy")), axis=1)

array([[0.02563701, 0.01372149, 0.7387607 , 0.22188073]], dtype=float32)

In [243]:
X, y=transform(data['1'], data['0'])

In [246]:
X=pad(X,54, UNK=np.zeros(300))

In [215]:
y

array([list(['joy', 'joy', 'joy', 'joy', 'joy', 'joy', 'joy', 'joy', 'joy', 'joy', 'joy', 'joy']),
       list(['sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness']),
       list(['anger', 'anger', 'anger', 'anger', 'anger', 'anger', 'anger', 'anger', 'anger', 'anger', 'anger', 'anger']),
       ...,
       list(['fear', 'fear', 'fear', 'fear', 'fear', 'fear', 'fear']),
       list(['sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness', 'sadness']),
       list(['joy', 'joy', 'joy', 'joy'])], dtype=object)

In [203]:
y_1=[]
mappings={'anger': np.array([1,0,0,0]), 'fear': np.array([0,1,0,0]), 'joy': np.array([0,0,1,0]), 'sadness': np.array([0,0,0,1])}
for entry in y:
    y_entry=[]
    for emot in entry:
        y_entry.append(mappings[emot])
    y_1.append(np.array(y_entry))
y_1=np.array(y_1)
y_1 
y_2=[]
for doc in y_1:
    UNK=doc[0]
    y_doc=pad(doc, 54, UNK)
    y_2.append(np.array(y_doc))
y_2=np.array(y_2)
y_2

array([array([[0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0]]),
       array([[0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 0, 1]]),
       array([[1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0]]),
       ...,
       array([[0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0]]),
       array([[0, 0, 0, 1],


In [None]:
#sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
#sentence_input = Input(shape=(None,), dtype='int32')
#embedded_sequences = embedding_layer(sentence_input)
#l_lstm = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
sentence_input= Input(shape=(None, 300))
l_lstm = Bidirectional(GRU(100, return_sequences=True))(sentence_input)
l_dense = TimeDistributed(Dense(200))(l_lstm)
l_att = AttLayer()(l_dense)
sentEncoder = Model(sentence_input, l_att)
 
#review_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
review_input = Input(shape=(7,None), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)
l_lstm_sent = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
l_dense_sent = TimeDistributed(Dense(200))(l_lstm_sent)
l_att_sent = AttLayer()(l_dense_sent)
preds = Dense(2, activation='softmax')(l_att_sent)
model = Model(review_input, preds)

In [None]:
han=Sequential()
han.add(Bidirectional(GRU(units=100, return_sequences=True), input_shape=(6,300)))
han.add(TimeDistributed(Dense(200)))
han.add(AttLayer())
classifier.add(Dropout(0.3))
classifier.add(Dense(units=4, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

<h1>anger, fear, joy, sadness</h1>