## 1. Load the IMDB dataset

In [14]:
# !pip install numpy==1.16.1
import numpy as np
np.__version__
old = np.load
np.load = lambda *a,**k: old(*a,**k,allow_pickle=True)

In [32]:
from tensorflow.keras.datasets import reuters
from tensorflow.keras.layers import Input , Dense
from tensorflow.keras import Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding # New!
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Activation, LSTM
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.utils import to_categorical

In [44]:
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=5000)

In [58]:
# len([len(curr) for curr in x_train if len(curr) > 400])

In [59]:
word_index = reuters.get_word_index()

In [60]:
text_to_index = reuters.get_word_index()
index_to_text = dict((text_to_index[k], k) for k in text_to_index)

In [61]:
index_to_text[2]

'of'

In [72]:
len(word_index)

30979

In [63]:
max_features = 5000 # size of vocab
input_length = 400 # max length of review (must PAD reviews)

batch_size = 32
# (5000 one-hot -> 50 embedded features)
embedding_dims = 50 # size of embedding layer 

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=input_length)
x_test = sequence.pad_sequences(x_test, maxlen=input_length)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Pad sequences (samples x time)
x_train shape: (8982, 400)
x_test shape: (2246, 400)


In [64]:
y_train_c = to_categorical(y_train)
y_test_c = to_categorical(y_test)

In [65]:
# len( np.unique(y_train) ) , len( np.unique(y_test) )

In [66]:
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import time

epochs=1
batch_size=64
tensorboard = TensorBoard(log_dir='./logs/%d' % time.time())
earlystopping = EarlyStopping(patience=3)


In [37]:
x_train.shape
from tensorflow.keras.layers import Flatten, RepeatVector,Permute , Multiply,Lambda 
from tensorflow.keras import backend

In [78]:
from tensorflow.keras.optimizers import Adam

In [67]:
# Hyper Parameters

TIME_STEPS = input_length
OUTPUT_UNITS = len(set(np.unique(y_train).tolist() + np.unique(y_test).tolist()))
OUTPUT_UNITS

46

# Functional LSTM API

In [85]:
inputs = Input(shape=(TIME_STEPS,))

# Encoder
emb = Embedding(max_features, 128)(inputs)
encoder_level1 = LSTM(128, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(emb)
encoder = LSTM(128, dropout=0.2, recurrent_dropout=0.2)(encoder_level1)

# Decoder
dense_out = Dense(OUTPUT_UNITS, activation='softmax')(encoder)
news_decoder = Model(inputs=inputs, outputs=dense_out)

news_decoder.compile(loss='categorical_crossentropy', 
#                    optimizer='adam',
                     optimizer=Adam(learning_rate=0.01),
                   metrics=['accuracy'])
news_decoder.summary()
news_decoder.fit(x_train, y_train_c, 
               batch_size=batch_size,
               epochs=1,
               validation_data=(x_test, y_test_c),
               callbacks=[tensorboard, earlystopping])

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 400)]             0         
_________________________________________________________________
embedding_3 (Embedding)      (None, 400, 128)          640000    
_________________________________________________________________
lstm_3 (LSTM)                (None, 400, 128)          131584    
_________________________________________________________________
lstm_4 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_3 (Dense)              (None, 46)                5934      
Total params: 909,102
Trainable params: 909,102
Non-trainable params: 0
_________________________________________________________________
Train on 8982 samples, validate on 2246 samples


<tensorflow.python.keras.callbacks.History at 0x7efeac5bd5f8>

In [86]:
news_decoder.fit(x_train, y_train_c, 
               batch_size=batch_size*3,
               epochs=5,
               validation_data=(x_test, y_test_c),
               callbacks=[tensorboard, earlystopping])

Train on 8982 samples, validate on 2246 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7eff004c4e10>

In [87]:
news_decoder.fit(x_train, y_train_c, 
               batch_size=batch_size*3,
               epochs=10,
               validation_data=(x_test, y_test_c),
               callbacks=[tensorboard, earlystopping])

Train on 8982 samples, validate on 2246 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


<tensorflow.python.keras.callbacks.History at 0x7eff004c4160>

In [89]:
news_decoder.fit(x_train, y_train_c, 
               batch_size=batch_size*3,
               epochs=20,
               validation_data=(x_test, y_test_c),
               callbacks=[tensorboard, earlystopping])

Train on 8982 samples, validate on 2246 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


<tensorflow.python.keras.callbacks.History at 0x7efeac5bd630>

In [90]:
news_decoder.save('reuters_92acc_big.h5')

# Functional LSTM API - v2

In [None]:
# V2
TIME_STEPS = input_length
inputs = Input(shape=(TIME_STEPS,))

# Encoder
emb = Embedding(max_features, 128)(inputs)
enccode_layer1 = LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(emb)
encoder = LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(enccode_layer1)

# Self-Attention Layer

input_dim = encoder.shape
print("Encoder",input_dim)
attention = Dense(1, activation='tanh')(encoder)
print("Dense", attention.shape)
attention = Flatten()(attention)
print("flatten",attention.shape)
attention = Activation('softmax')(attention)
print("softmax", attention.shape)
units = 100
attention = RepeatVector(units)(attention)
print("repear vector",attention.shape)
attention = Permute([2, 1])(attention)
print("Permute 2 1", attention.shape)
attention = Multiply()([encoder, attention])
print("Multiply enc,att",attention.shape)
# attention = Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=True))(attention)
attention = Lambda(lambda x: backend.sum(x, axis=1, keepdims=False))(attention)
#attention = backend.sum(attention, axis=1, keepdims=False)
print("backend.sum",attention.shape)
# Decoder
"""
Previous:
Encoder shape
(?, 100)
"""
dense_out = Dense(OUTPUT_UNITS, activation='softmax')(attention)
sentiment_decoder = Model(inputs=inputs, outputs=dense_out)

sentiment_decoder.compile(loss='categorical_crossentropy', 
                     optimizer=Adam(learning_rate=0.01),
                   metrics=['accuracy'])
sentiment_decoder.summary()
sentiment_decoder.fit(x_train, y_train_c, 
               batch_size=batch_size,
               epochs=epochs,
               validation_data=(x_test, y_test_c),
               callbacks=[tensorboard, earlystopping])

Encoder (?, 400, 100)
Dense (?, 400, 1)
flatten (?, 400)
softmax (?, 400)
repear vector (?, 100, 400)
Permute 2 1 (?, 400, 100)
Multiply enc,att (?, 400, 100)
backend.sum (?, 100)
Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 400)]        0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 400, 128)     640000      input_7[0][0]                    
__________________________________________________________________________________________________
lstm_9 (LSTM)                   (None, 400, 100)     91600       embedding_6[0][0]                
__________________________________________________________________________________________________
lstm_10 (LS

In [95]:
batch_size = 64
epochs=5
sentiment_decoder.fit(x_train, y_train_c, 
               batch_size=batch_size,
               epochs=epochs,
               validation_data=(x_test, y_test_c),
               callbacks=[tensorboard, earlystopping])

Train on 8982 samples, validate on 2246 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7efb4fa33b00>

In [96]:
sentiment_decoder.save('reuters_v2.h5')

In [None]:
layer_outputs = [layer.output for layer in sentiment_decoder.layers[1:]] 
# Extracts the outputs of the top 12 layers
activation_model = Model(inputs=sentiment_decoder.input, outputs=layer_outputs) # Creates a model that will return these outputs, given the model input

In [None]:
activations = activation_model.predict(test_reviews)
for act,layername in zip(activations,layer_outputs):
    print(layername)
    print(act.shape)

In [None]:
sentiment_decoder.save('imdb_lstm-v2.h5')

In [None]:
sentiment_decoder.summary()

In [None]:
# sentiment_decoder.metrics[0]
# sentiment_decoder.

In [106]:
score, acc = sentiment_decoder.evaluate(x_test, y_test_c, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 1.0838982986745724
Test accuracy: 0.7413179


## Predit

In [58]:
# Predict
# Source: https://www.imdb.com/title/tt3104988/reviews

test1 = """I was so excited when I first learned that Kevin Kwan's "Crazy Rich Asians" was going to become a film! The book was way more appealing than I had first imagined it would be, and I'm happy to report that Jon Chu's screen version has surpassed my hopeful-but-wary expectations. Not to make it sound too simplistic, the movie was beautiful and very, very funny. Go see it!
Yes, it is a romantic comedy - but this has such intriguing social and cultural undercurrents that it tempts even the fairly observant watcher away from taking the "Cinderella" story at its glitzy face value. While the numerous characters had to have their backstories compressed to fit into just two hours, we are given enough great dialogue, effervescent or slightly evil portrayals, and sumptuous visual clues to make the friends and family members in Singapore come alive.
All the aunties, cousins and ladies-in-waiting may be slightly overwhelming for people who haven't read the book, but anyone with wacky friends and pompous relatives should get it, even if they are not Asian. 
I liked film's especially clever use of graphics, as well as the smooth-to-rocking score, the lush and verdant locations, the perfect designer costuming, and pretty much everything else. One of my favorite lines was about having attended Cal State Fullerton; but you must to watch it for yourself. I honestly have not laughed out loud during a film this much in decades. Oh, and I rather liked Chris Pang, too. A totally hot actor, even though I'm old enough to be his mother.
As soon as Crazy Rich Asians officially opens, I'm going to catch it again. The preview was not enough, and there were so many little moments that deserve a second or third look. Now we must all hope that a sequel with the same talented cast and Chu in charge is coming our way before too long. Thank you all, you fabulous Asian actors, crew, writers and backers who made this possible. And no, I'm not of even a little bit Asian ancestry.
"""
test2 = """what a boring movie. This was a very boring film. I fell asleep in the cinema. This movie deserves no attention! I do not recommend this movie because it's a waste of time."""

def clean_and_get_sequence(text):
    # https://keras.io/preprocessing/text/#text_to_word_sequence
    from tensorflow.keras.preprocessing.text import text_to_word_sequence

    test_sequence = text_to_word_sequence(text, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\n   ',
                                          lower=True, split=' ')
    # print('before cleaning:', test_sequence)

    # drop words not in vocab
    test_sequence_cleaned = [s for s in test_sequence if s in text_to_index]
    #print('after cleaning:', len(test_sequence_cleaned))

    # words that got dropped
    #print('dropped words:', set(test_sequence) - set(test_sequence_cleaned))

    # map to indices
    test_sequence_index = [text_to_index[s] for s in test_sequence_cleaned]
    #print('as index\n', sequence_index)

    # filter out top 5000
    test_sequence_index_5000 = [i for i in test_sequence_index if i <= 5000]
    print('as index (top 5000 only)\n', test_sequence_index_5000)

    # look at review
    test_review = ' '.join([index_to_text[i] for i in test_sequence_index_5000])
    print('as words (top 5000 only)\n', test_review)
    
    return test_sequence_index_5000

test1_index = clean_and_get_sequence(test1)
test2_index = clean_and_get_sequence(test2)

as index (top 5000 only)
 [10, 13, 35, 2226, 51, 10, 83, 2048, 12, 1839, 929, 1023, 13, 167, 5, 410, 3, 19, 1, 271, 13, 93, 50, 2273, 71, 10, 66, 83, 3789, 9, 59, 27, 2, 143, 651, 5, 4433, 12, 2633, 265, 307, 44, 58, 18, 1395, 21, 5, 94, 9, 478, 96, 4271, 1, 17, 13, 304, 2, 52, 52, 160, 137, 64, 9, 419, 9, 6, 3, 728, 209, 18, 11, 44, 138, 1771, 1028, 2, 2664, 12, 9, 57, 1, 1015, 242, 36, 653, 1, 2376, 62, 30, 91, 390, 1104, 134, 1, 1939, 102, 66, 5, 25, 65, 5, 1180, 80, 40, 104, 631, 72, 23, 345, 192, 84, 411, 39, 1073, 442, 2, 1111, 3623, 5, 94, 1, 366, 2, 220, 1063, 8, 213, 1236, 29, 1, 2, 1911, 8, 1061, 200, 27, 1073, 3978, 15, 81, 34, 771, 329, 1, 271, 18, 256, 16, 4943, 366, 2, 4781, 141, 76, 9, 57, 45, 33, 23, 21, 2185, 10, 420, 595, 259, 1093, 358, 4, 2873, 14, 70, 14, 1, 3554, 5, 600, 1, 2, 1976, 1, 401, 2, 181, 73, 282, 331, 28, 4, 58, 511, 408, 13, 41, 257, 1107, 18, 22, 212, 5, 103, 9, 15, 621, 10, 1249, 25, 21, 1495, 43, 1289, 312, 3, 19, 11, 73, 8, 2737, 446, 2, 10, 244, 4

In [59]:
# from keras.preprocessing import sequence

test_reviews = [test1_index, test2_index]

print('Pad sequences (samples x time)')
test_reviews = sequence.pad_sequences(test_reviews, maxlen=input_length)
print('test_reviews shape:', test_reviews.shape)

Pad sequences (samples x time)
test_reviews shape: (2, 400)


In [101]:
# x_test[0], y_test[0]

(array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 

In [104]:
# tests = [test1, test2]
# tests = [x_test[0], x_test[1]]
pred_prob = sentiment_decoder.predict([x_test[0]])

# for text, probability in zip(tests, pred_prob):
#     print(text)
#     print('positive', 'probability', probability)
#     print('-------')

ValueError: Error when checking input: expected input_7 to have shape (400,) but got array with shape (1,)

In [118]:
# x_test[0].shape
pred = sentiment_decoder.predict(x_test[0:1])
np.argmax(pred) , y_test[0]

(3, 3)

In [119]:
# x_test[0].shape
pred = sentiment_decoder.predict(x_test[3:4])
np.argmax(pred) , y_test[3]

(4, 4)

In [108]:
sentiment_decoder.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 400)]        0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 400, 128)     640000      input_7[0][0]                    
__________________________________________________________________________________________________
lstm_9 (LSTM)                   (None, 400, 100)     91600       embedding_6[0][0]                
__________________________________________________________________________________________________
lstm_10 (LSTM)                  (None, 400, 100)     80400       lstm_9[0][0]                     
____________________________________________________________________________________________