# Twitter Sentiment Analysis (RNN Family)
This notebook compares SimpleRNN, LSTM, and BiLSTM on a small sample dataset.
Replace the CSV paths with your full dataset.

In [None]:
import re, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Bidirectional, Dense
from pathlib import Path

DATA_PATH = Path('.')
df = pd.read_csv(DATA_PATH / 'twitter_training.csv')
def clean(t):
    t = t.lower()
    t = re.sub(r'http\S+|www\S+','',t)
    t = re.sub(r"[^a-z0-9\s#@'’]+"," ",t)
    t = re.sub(r"\s+"," ",t).strip()
    return t
df['text'] = df['text'].astype(str).map(clean)
le = LabelEncoder(); y = le.fit_transform(df['label'])
tok = Tokenizer(num_words=8000, oov_token='<OOV>'); tok.fit_on_texts(df['text'])
X = tok.texts_to_sequences(df['text']); X = pad_sequences(X, maxlen=50)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

def build(model_type='bilstm'):
    m = Sequential()
    m.add(Embedding(8000, 64, input_length=50))
    if model_type=='simplernn': m.add(SimpleRNN(32))
    elif model_type=='lstm': m.add(LSTM(32))
    else: m.add(Bidirectional(LSTM(32)))
    m.add(Dense(32, activation='relu'))
    m.add(Dense(len(le.classes_), activation='softmax'))
    m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return m

results = {}
for t in ['simplernn','lstm','bilstm']:
    m = build(t)
    m.fit(X_tr, y_tr, epochs=3, batch_size=16, validation_split=0.2, verbose=0)
    loss, acc = m.evaluate(X_te, y_te, verbose=0)
    results[t] = {'test_acc': float(acc)}
results