In [1]:
from typing import Tuple, Callable

import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Activation, Dense, Input, SimpleRNN, LSTM, GRU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.layers.pooling import AveragePooling2D

from tf_utils.imdbDataAdvanced import IMDB

In [2]:
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def create_rnn_model(input_shape: Tuple[int, int], num_classes: int) -> Model:
    input_text = Input(shape=input_shape)
    x = SimpleRNN(units=80, return_sequences=False)(input_text) # False --> One to many --> Nicht in jedem Zeitschritt ein Output
    x = Dense(units=80)(x)
    x = Activation("relu")(x)
    x = Dense(units=num_classes)(x)
    out = Activation("softmax")(x)
    model = Model(inputs=[input_text], outputs=[out])
    opt = Adam(learning_rate=1e-4)
    model.compile(
        loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
    )
    model.summary()
    return model

In [4]:
def create_lstm_model(input_shape: Tuple[int, int], num_classes: int) -> Model:
    input_text = Input(shape=input_shape)
    x = LSTM(units=80, return_sequences=False)(input_text)
    x = Dense(units=80)(x)
    x = Activation("relu")(x)
    x = Dense(units=num_classes)(x)
    out = Activation("softmax")(x)
    model = Model(inputs=[input_text], outputs=[out])
    opt = Adam(learning_rate=1e-4)
    model.compile(
        loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
    )
    model.summary()
    return model

In [5]:
def create_gru_model(input_shape: Tuple[int, int], num_classes: int) -> Model:
    input_text = Input(shape=input_shape)
    x = GRU(units=80, return_sequences=False)(input_text)
    x = Dense(units=80)(x)
    x = Activation("relu")(x)
    x = Dense(units=num_classes)(x)
    out = Activation("softmax")(x)
    model = Model(inputs=[input_text], outputs=[out])
    opt = Adam(learning_rate=1e-4)
    model.compile(
        loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
    )
    model.summary()
    return model

In [6]:
vocab_size = 20_000
sequence_length = 80
imdb_data = IMDB(vocab_size, sequence_length)
train_dataset = imdb_data.get_train_set()
val_dataset = imdb_data.get_val_set()
test_dataset = imdb_data.get_test_set()
input_shape = (sequence_length, 1)
num_classes = imdb_data.num_classes

batch_size = 512
epochs = 10

model_fns = {
    "RNN": create_rnn_model,
    "LSTM": create_lstm_model,
    "GRU": create_gru_model,
}

for name, model_fn in model_fns.items():
    print(f"Model: {name}")
    model = model_fn(input_shape, num_classes)
    model.fit(
        x=train_dataset,
        verbose=1,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=val_dataset,
    )
    score = model.evaluate(x=test_dataset, verbose=0, batch_size=batch_size)
    print(f"Test performance: {score}")

Model: RNN
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 80, 1)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 80)                6560      
                                                                 
 dense (Dense)               (None, 80)                6480      
                                                                 
 activation (Activation)     (None, 80)                0         
                                                                 
 dense_1 (Dense)             (None, 2)                 162       
                                                                 
 activation_1 (Activation)   (None, 2)                 0         
                                                                 
Total params: 13,202
Trainable params: 13,202
Non-