In [1]:
pip install tensorflow numpy matplotlib




In [6]:
# Importing necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Load dataset (Using IMDB dataset available in Keras for demonstration)
from tensorflow.keras.datasets import imdb

# Hyperparameters
vocab_size = 10000  # Vocabulary size (adjustable)
embedding_dim = 64  # Embedding layer size
max_length = 200  # Maximum length of input sequences
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"
batch_size = 64
epochs = 10

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Padding sequences to ensure consistent input shape
x_train = pad_sequences(x_train, maxlen=max_length, padding=padding_type, truncating=trunc_type)
x_test = pad_sequences(x_test, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Define the LSTM model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    LSTM(64, return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dense(24, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification (positive/negative)
])

model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
print(model.summary())

# Train the model
history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), batch_size=batch_size)

# Plotting training history
def plot_history(history):
    plt.figure(figsize=(8, 4))
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Model Accuracy')
    plt.show()

    plt.figure(figsize=(8, 4))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Model Loss')
    plt.show()

plot_history(history)

# Prediction on new text
def predict_sentiment(model, tokenizer, text, max_length=max_length):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding=padding_type, truncating=trunc_type)
    prediction = model.predict(padded_sequence)
    sentiment = "Positive" if prediction >= 0.5 else "Negative"
    print(f"Text: {text}")
    print(f"Sentiment: {sentiment} (Confidence: {prediction[0][0]:.2f})")

# Sample prediction



None
Epoch 1/10


KeyboardInterrupt: 

In [7]:


sample_text = "not good."
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(imdb.get_word_index())
predict_sentiment(model, tokenizer, sample_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321ms/step
Text: not good.
Sentiment: Negative (Confidence: 0.49)
