# Introduction
 This notebook leverages pre-defined functions from the `train_lstm.py` script to train an LSTM model on the Sentiment140 dataset using tokenized text sequences.

### Setup

In [None]:
import sys
sys.path.append('../../src/models/')  # Add the path to the script

In [None]:
from train_lstm import (
    load_data, tokenize_and_pad, build_lstm_attention_model,
    train_lstm_attention_model, evaluate_model, save_model_and_tokenizer
)

### Load the cleaned data

In [None]:
df = load_data('../../data/processed/cleaned_data.csv')
df = df.dropna(subset=['clean_text'])

### Tokenize and pad sequences

In [None]:
X, y, tokenizer = tokenize_and_pad(df)

### Build LSTM Model

In [None]:
model = build_lstm_attention_model()

In [None]:
model.summary()

### Train the LSTM Model

In [None]:
model, history = train_lstm_attention_model(X, y, model)

### Evaluate the Model

In [None]:
accuracy, report = evaluate_model(model, X, y)
print(f"Model Accuracy on Full Dataset: {accuracy}")
print("\nClassification Report:\n", report)

### Visualization of training history (e.g., loss and accuracy over epochs)

### Save the model and tokenizer

In [None]:
save_model_and_tokenizer(
    model, tokenizer,
    '../../models/lstm_model.h5',
    '../../models/tokenizer_lstm.pkl'
)