In [3]:
import matplotlib.pyplot as plt
from keras.preprocessing import sequence
from keras.datasets import imdb
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense, GRU, LSTM, Bidirectional, Dropout, Flatten, ReLU, BatchNormalization
from keras.optimizers import Adam, Lion
from keras.callbacks import EarlyStopping

from keras.utils import custom_object_scope

In [5]:
max_features = 10000
maxlen = 500

In [6]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


Передпроцесинг

In [7]:
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

###Simple RNN

In [11]:
model_rnn1 = Sequential()
model_rnn1.add(Embedding(max_features, 32))
model_rnn1.add(SimpleRNN(128, return_sequences=True))
model_rnn1.add(Dropout(0.2))
model_rnn1.add(SimpleRNN(64, return_sequences=False))
model_rnn1.add(Dropout(0.2))
model_rnn1.add(Dense(1, activation='sigmoid'))

model_rnn1.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model_rnn1.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 32)          320000    
                                                                 
 simple_rnn (SimpleRNN)      (None, None, 128)         20608     
                                                                 
 dropout_1 (Dropout)         (None, None, 128)         0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 64)                12352     
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 353025 (1.35 MB)
Trainable params: 35302

In [7]:
model_rnn1.fit(x_train, y_train, epochs=5, batch_size=128, validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7dc775c7f580>

In [8]:
test_loss, test_acc = model_rnn1.evaluate(x_test, y_test)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Test Loss: 0.4179951846599579
Test Accuracy: 81.25600218772888


In [14]:
model_rnn_3 = Sequential()
model_rnn_3.add(Embedding(max_features, 64))
model_rnn_3.add(SimpleRNN(128, return_sequences=True))
model_rnn_3.add(Dropout(0.5))
model_rnn_3.add(SimpleRNN(64, return_sequences=False))
model_rnn_3.add(Dropout(0.5))

model_rnn_3.add(Dense(1, activation='sigmoid'))

with custom_object_scope({'Lion': Lion}):
  model_rnn_3.compile(optimizer='Lion', loss='binary_crossentropy', metrics=['accuracy'])

# Додайте EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_rnn_3.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, None, 64)          640000    
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, None, 128)         24704     
                                                                 
 dropout_3 (Dropout)         (None, None, 128)         0         
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 64)                12352     
                                                                 
 dropout_4 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 677121 (2.58 MB)
Trainable params: 67712

In [14]:
model_rnn_3.fit(x_train, y_train, epochs=10, batch_size=100, validation_data=(x_test, y_test), callbacks=[early_stopping])

test_loss, test_acc = model_rnn_3.evaluate(x_test, y_test)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Test Loss: 0.33838602900505066
Test Accuracy: 86.27200126647949


Висновки: найкращою виявилася model_rnn_3 з 2-ма SimpleRNN шарами та оптимізатором Lion і застосуванням early_stopping. Найпростіша RNN-модель без Dropout і з одним шаром показала найгірші результати (72%).

###LSTM model

model_lstm_1 - максимально проста

In [15]:
model_lstm_1 = Sequential()
model_lstm_1.add(Embedding(max_features, 64))
model_lstm_1.add(LSTM(64))
model_lstm_1.add(Dense(1, activation='sigmoid'))

model_lstm_1.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model_lstm_1.summary()


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, None, 64)          640000    
                                                                 
 lstm (LSTM)                 (None, 64)                33024     
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 673089 (2.57 MB)
Trainable params: 673089 (2.57 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
model_lstm_1.fit(x_train, y_train, epochs=5, batch_size=128, validation_data=(x_test, y_test))

test_loss, test_acc = model_lstm_1.evaluate(x_test, y_test, verbose=0)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.3035857379436493
Test Accuracy: 87.74799704551697


model_lstm_2 - 2 LSTM-шари

In [22]:
model_lstm_2 = Sequential()
model_lstm_2.add(Embedding(max_features, 32))
model_lstm_2.add(LSTM(128, return_sequences=True))
model_lstm_2.add(Dropout(0.2))
model_lstm_2.add(LSTM(64))
model_lstm_2.add(Dropout(0.2))

model_lstm_2.add(Dense(1, activation='sigmoid'))

model_lstm_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model_lstm_2.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, None, 32)          320000    
                                                                 
 lstm_5 (LSTM)               (None, None, 128)         82432     
                                                                 
 dropout_8 (Dropout)         (None, None, 128)         0         
                                                                 
 lstm_6 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_9 (Dropout)         (None, 64)                0         
                                                                 
 dense_7 (Dense)             (None, 1)                 65        
                                                                 
Total params: 451905 (1.72 MB)
Trainable params: 45190

In [24]:
model_lstm_2.fit(x_train, y_train, epochs=5, batch_size=200, validation_data=(x_test, y_test), callbacks=[early_stopping])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7dc778a261d0>

In [25]:
test_loss, test_acc = model_lstm_2.evaluate(x_test, y_test, verbose=0)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Test Loss: 0.3547706604003906
Test Accuracy: 86.66399717330933


Висновки: model_lstm_1 з rmsprop оптимізатором та early_stopping виявилася точнішою за model_lstm_2 і навчалася значно швидше. Отже, нема сенсу ускладнювати LSMT-модель додатковими LSTM-шарами.

#Bidirectional

Проста Bidirectional-модель

In [26]:
model_bi_1 = Sequential()
model_bi_1.add(Embedding(max_features, 32))
model_bi_1.add(Bidirectional(LSTM(64)))
model_bi_1.add(Dropout(0.2))

model_bi_1.add(Dense(1, activation='sigmoid'))

model_bi_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_bi_1.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, None, 32)          320000    
                                                                 
 bidirectional (Bidirection  (None, 128)               49664     
 al)                                                             
                                                                 
 dropout_10 (Dropout)        (None, 128)               0         
                                                                 
 dense_8 (Dense)             (None, 1)                 129       
                                                                 
Total params: 369793 (1.41 MB)
Trainable params: 369793 (1.41 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
model_bi_1.fit(x_train, y_train, epochs=5, batch_size=256, validation_data=(x_test, y_test), callbacks=[early_stopping])
test_loss, test_acc = model_bi_1.evaluate(x_test, y_test, verbose=0)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.33300507068634033
Test Accuracy: 87.11199760437012


Bidirectional-модель на 2 шари

In [None]:
model_bi_2 = Sequential()
model_bi_2.add(Embedding(max_features, 32))
model_bi_2.add(Bidirectional(LSTM(128, return_sequences=True)))
model_bi_2.add(BatchNormalization())
model_bi_2.add(Bidirectional(LSTM(64)))
model_bi_2.add(BatchNormalization())


model_bi_2.add(Dense(1, activation='sigmoid'))

with custom_object_scope({'Lion': Lion}):
  model_bi_2.compile(optimizer='Lion', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_bi_2.summary()

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_17 (Embedding)    (None, None, 32)          160000    
                                                                 
 bidirectional_8 (Bidirecti  (None, None, 256)         164864    
 onal)                                                           
                                                                 
 batch_normalization (Batch  (None, None, 256)         1024      
 Normalization)                                                  
                                                                 
 bidirectional_9 (Bidirecti  (None, 128)               164352    
 onal)                                                           
                                                                 
 batch_normalization_1 (Bat  (None, 128)               512       
 chNormalization)                                    

In [None]:
model_bi_2.fit(x_train, y_train, epochs=10, batch_size=256, validation_data=(x_test, y_test), callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


<keras.src.callbacks.History at 0x7b4ac5996da0>

In [None]:
test_loss, test_acc = model_bi_2.evaluate(x_test, y_test, verbose=0)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Test Loss: 0.3862709105014801
Test Accuracy: 83.00399780273438


Висновки: model_bi_1 з одним Bidirectional-леєром та оптимізатором Адам виявилася максимально точною та досить швидкою у навчанні. Отже навряд чи є сенс додавати дотакові шари.

DEEP

In [None]:
model_deep_3 = Sequential()


model_deep_3.add(Embedding(max_features, 32, input_length=maxlen))
model_deep_3.add(Bidirectional(LSTM(64, return_sequences=True)))
model_deep_3.add(Dropout(0.2))
model_deep_3.add(Bidirectional(LSTM(32, return_sequences=True)))
model_deep_3.add(Dropout(0.2))
model_deep_3.add(LSTM(16))
model_deep_3.add(Dropout(0.2))

model_deep_3.add(Dense(1, activation='sigmoid'))

model_deep_3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_deep_3.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_16 (Embedding)    (None, 200, 32)           160000    
                                                                 
 bidirectional_6 (Bidirecti  (None, 200, 128)          49664     
 onal)                                                           
                                                                 
 dropout_43 (Dropout)        (None, 200, 128)          0         
                                                                 
 bidirectional_7 (Bidirecti  (None, 200, 64)           41216     
 onal)                                                           
                                                                 
 dropout_44 (Dropout)        (None, 200, 64)           0         
                                                                 
 lstm_38 (LSTM)              (None, 16)              

In [None]:
model_deep_3.fit(x_train, y_train,
          batch_size=64,
          epochs=10,
          validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b4aac1dc340>

In [None]:
test_loss, test_acc = model_deep_3.evaluate(x_test, y_test, verbose=0)
print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc * 100))

Test Loss: 0.5142635703086853
Test Accuracy: 84.87600088119507


Загалом для даного датасету мабуть недоцільно використовувати Deep-моделі, навчання яких триває дуже довго, а результат виходить навіть нижчим ніж в RNN та LSTM-моделях. Найкраще показали себе двошарова RNN-модель з early_stopping та Lion. Дві LSTM-моделі на 1 та 2 шари дали непоагний результат (87 та 86%).
Частково я використовувала deep-моделі в LSTM та Bidirectional, прагнучи отримати кращі показники.