# Model 13: LSTM (Long Short-Term Memory) â€“ Step-by-Step
Train an **LSTM** for **text sentiment analysis** using the IMDB dataset.

You will learn:
1) Why LSTM is better than vanilla RNN
2) How to prepare sequence data
3) How to build an LSTM model
4) Train, evaluate, and test on new text


In [None]:
# If TensorFlow is missing, uncomment the next line:
# !pip -q install tensorflow

import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

print('TensorFlow version:', tf.__version__)

## 1) Load IMDB dataset

In [None]:
num_words = 10000
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=num_words)

print('Train samples:', len(x_train))
print('Test samples:', len(x_test))

## 2) Pad sequences

In [None]:
maxlen = 200
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test  = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

print('Train shape:', x_train.shape)
print('Test shape :', x_test.shape)

## 3) Build the LSTM model

In [None]:
model = models.Sequential([
    layers.Embedding(input_dim=num_words, output_dim=64, input_length=maxlen),
    layers.LSTM(64),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

## 4) Train

In [None]:
history = model.fit(
    x_train, y_train,
    epochs=3,
    batch_size=128,
    validation_split=0.2,
    verbose=1
)

## 5) Evaluate

In [None]:
loss, acc = model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy:', acc)
print('Test loss:', loss)

## 6) Try your own text

In [None]:
word_index = tf.keras.datasets.imdb.get_word_index()

def encode_review(text, maxlen=200):
    encoded = []
    for w in text.lower().split():
        idx = word_index.get(w)
        if idx:
            encoded.append(idx + 3)
    return tf.keras.preprocessing.sequence.pad_sequences([encoded], maxlen=maxlen)

def predict_sentiment(text):
    x = encode_review(text)
    prob = float(model.predict(x, verbose=0)[0][0])
    label = 'positive' if prob >= 0.5 else 'negative'
    return label, prob

examples = [
    'this movie was amazing and inspiring',
    'terrible movie boring and slow'
]

for t in examples:
    label, prob = predict_sentiment(t)
    print(t)
    print('Prediction:', label, '| Probability:', prob)
    print('-'*60)