In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import time
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [3]:
# Load the IMDb dataset provided by Keras
dataset = keras.datasets.imdb

# Load data, keeping only the most frequent words (e.g., 10,000 words)
num_words = 10000
(x_train, y_train), (x_test, y_test) = dataset.load_data(num_words=num_words)

# Check dataset dimensions
print(f"Training set size: {x_train.shape}, Test set size: {x_test.shape}")

# Decode an example to understand the dataset format
word_index = dataset.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(text):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in text])

print("Example of decoded review:")
print(decode_review(x_train[0]))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1us/step
Training set size: (25000,), Test set size: (25000,)
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1us/step
Example of decoded review:
? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really crie

In [4]:
# Data preprocessing
max_len = 500  # Maximum sequence length
x_train = pad_sequences(x_train, maxlen=max_len, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_len, padding='post', truncating='post')

In [5]:
# Create the model with an embedding layer trained from scratch
embedding_dim = 100

model = Sequential([
    Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=max_len),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Display model summary
model.summary()

# Train the model
batch_size = 32
epochs = 10
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test))


2025-03-17 19:51:01.049426: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m315s[0m 399ms/step - accuracy: 0.5050 - loss: 0.6939 - val_accuracy: 0.5047 - val_loss: 0.6940
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m335s[0m 428ms/step - accuracy: 0.5112 - loss: 0.6933 - val_accuracy: 0.5053 - val_loss: 0.6930
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 398ms/step - accuracy: 0.5104 - loss: 0.6926 - val_accuracy: 0.5028 - val_loss: 0.6923
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 398ms/step - accuracy: 0.5123 - loss: 0.6895 - val_accuracy: 0.5050 - val_loss: 0.6951
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 398ms/step - accuracy: 0.5284 - loss: 0.6749 - val_accuracy: 0.5078 - val_loss: 0.6990
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 398ms/step - accuracy: 0.5392 - loss: 0.6528 - val_accuracy: 0.5143 - val_loss: 0.7160
Epoc

<keras.src.callbacks.history.History at 0x7e9c93abd0d0>

In [None]:
# Create the model with an embedding layer trained from scratch
embedding_dim = 100

model = Sequential([
    Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=max_len),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Display model summary
model.summary()

# Train the model with time tracking
batch_size = 32
epochs = 10

start_time = time.time()  # ⏱️ Start time

history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(x_test, y_test)
)

end_time = time.time()  # ⏱️ End time

elapsed_time = end_time - start_time
print(f"\nTraining time: {elapsed_time:.2f} seconds")
