# GRU model for sentiment analysis

## model development

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
# Load the IMDb dataset

max_words = 20000  # Consider the top 10,000 most frequent words
max_len = 200  # Pad sequences to a maximum length of 200 words

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

# Pad sequences to ensure all inputs are of the same length

X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [None]:
def build_gru_model():
    model = models.Sequential()
    model.add(layers.Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
    model.add(layers.GRU(128, return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.GRU(64, return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.GRU(32, return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.GRU(16, return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.GRU(8, return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.GRU(4, return_sequences=False))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

gru_model = build_gru_model()

In [None]:
# Define the ModelCheckpoint callback to save the best model

checkpoint_callback = ModelCheckpoint(
    filepath='models/gru_model_sa.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

# Define early stopping callback

early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

In [None]:
# Train the GRU model

history_gru = gru_model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint_callback, early_stopping],
    verbose=1
)

In [None]:
# Evaluate the GRU model

gru_loss, gru_accuracy = gru_model.evaluate(X_test, y_test, verbose=1)
print(f"GRU Model Accuracy: {gru_accuracy * 100:.2f}%")

## model utilization

In [4]:
# load the saved model

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import load_model

best_gru_model = load_model('models/gru_model_sa.keras')

In [None]:
word_index = imdb.get_word_index()

def preprocess_query(query, max_len=300):
    words = query.lower().split()
    query_indices = [word_index.get(word, 2) for word in words]
    padded_query = pad_sequences([query_indices], maxlen=max_len)
    return padded_query

In [None]:
def predict_sentiment(query):
    preprocessed_query = preprocess_query(query)
    prediction = best_gru_model.predict(preprocessed_query)
    sentiment = "POSITIVE" if prediction[0][0] > 0.5 else "NEGATIVE"
    return sentiment, prediction[0][0]

custom_queries = [
    "I loved this movie, it was fantastic!",
    "This movie was terrible and boring.",
    "The plot was interesting but the acting was bad."
]

for query in custom_queries:
    sentiment, confidence = predict_sentiment(query)
    print(f"Query: {query}")
    print(f"Sentiment: {sentiment}, Confidence: {confidence:.4f}")
    print("-------------------------")