In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from app.preprocess import load_data, preprocess_data

# Load and preprocess data
file_path = 'data/IMDB Dataset.csv'
reviews, labels = load_data(file_path)
data, tokenizer = preprocess_data(reviews)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Build the model
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=100))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_split=0.2, verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

# Save the model and tokenizer
model.save('app/sentiment_analysis_model.h5')

import pickle
with open('app/tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)


Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 1s/step - accuracy: 0.6936 - loss: 0.5549 - val_accuracy: 0.8655 - val_loss: 0.3208
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 1s/step - accuracy: 0.9012 - loss: 0.2567 - val_accuracy: 0.8750 - val_loss: 0.3110
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m216s[0m 865ms/step - accuracy: 0.9275 - loss: 0.1920 - val_accuracy: 0.8755 - val_loss: 0.3019
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 990ms/step - accuracy: 0.9445 - loss: 0.1530 - val_accuracy: 0.8786 - val_loss: 0.3191
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 833ms/step - accuracy: 0.9628 - loss: 0.1103 - val_accuracy: 0.8664 - val_loss: 0.4445
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 113ms/step - accuracy: 0.8640 - loss: 0.4529




Test Accuracy: 0.87
