### Imports


In [10]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.tokenize import word_tokenize
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from sklearn.metrics import classification_report


### Preprocessing 

In [4]:
def tokenize(text):
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)  
    text = re.sub(r'[^a-zA-Z\s.]', '', text)  
    tokens = word_tokenize(text.lower())
    return [token for token in tokens if token.isalpha() or token == '.']

### Load and Prepare Dataset

In [None]:
df = pd.read_csv('twitter_training.csv', names=['id', 'game', 'label', 'text'], quotechar='"', skipinitialspace=True)
df = df[df['label'].isin(['Positive', 'Negative', 'Neutral'])]
df = df.drop_duplicates(subset=['text'])
df = df.dropna(subset=['text'])

df['tokens'] = df['text'].apply(tokenize)

all_words = [word for tokens in df['tokens'] for word in tokens]
vocab = list(set(all_words))
word2idx = {word: idx + 1 for idx, word in enumerate(vocab)} 
vocab_size = len(word2idx) + 1  
df['indices'] = df['tokens'].apply(lambda x: [word2idx[w] for w in x if w in word2idx])
max_length = 20 
X = pad_sequences(df['indices'], maxlen=max_length, padding='post')

label2idx = {'Positive': 0, 'Negative': 1, 'Neutral': 2}
y = np.array([label2idx[label] for label in df['label']])
y_one_hot = tf.keras.utils.to_categorical(y, num_classes=3)

## RNN Model

In [6]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),  
    SimpleRNN(64, return_sequences=False),  
    Dense(32, activation='relu'),  
    Dense(3, activation='softmax')  # Output layer for 3 classes
])


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])




### Train the Model

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)
history = model.fit(X_train, y_train, epochs=3, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/3
[1m1433/1433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 20ms/step - accuracy: 0.5579 - loss: 0.9069 - val_accuracy: 0.7988 - val_loss: 0.5204
Epoch 2/3
[1m1433/1433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 19ms/step - accuracy: 0.8793 - loss: 0.3383 - val_accuracy: 0.8537 - val_loss: 0.3954
Epoch 3/3
[1m1433/1433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - accuracy: 0.9445 - loss: 0.1618 - val_accuracy: 0.8601 - val_loss: 0.4078


In [8]:
def predict_label(text):
    tokens = tokenize(text)
    sequence = [word2idx.get(w, 0) for w in tokens]  # 0 for unknown words
    padded_sequence = pad_sequences([sequence], maxlen=max_length, padding='post')
    prediction = model.predict(padded_sequence, verbose=0)
    label_idx = np.argmax(prediction, axis=-1)[0]
    idx2label = {0: 'Positive', 1: 'Negative', 2: 'Neutral'}
    return idx2label[label_idx]

### Evaluate the Model

In [11]:
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"\nValidation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

y_val_pred = model.predict(X_val, verbose=0)
y_val_pred_labels = np.argmax(y_val_pred, axis=1)
y_val_true_labels = np.argmax(y_val, axis=1)

class_report = classification_report(y_val_true_labels, y_val_pred_labels, target_names=['Positive', 'Negative', 'Neutral'])
print("\nClassification Report:")
print(class_report)

[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8597 - loss: 0.4119

Validation Loss: 0.4078
Validation Accuracy: 0.8601

Classification Report:
              precision    recall  f1-score   support

    Positive       0.84      0.88      0.86      3770
    Negative       0.89      0.86      0.87      4176
     Neutral       0.85      0.84      0.85      3513

    accuracy                           0.86     11459
   macro avg       0.86      0.86      0.86     11459
weighted avg       0.86      0.86      0.86     11459



In [12]:
test_sentences = [
    "I love playing Borderlands 2 so much!",
    "Borderlands 3 is a complete disappointment",
    "Just finished a Borderlands session today"
]

print("\nPredictions:")
for sentence in test_sentences:
    prediction = predict_label(sentence)
    print(f"Sentence: '{sentence}' -> Label: {prediction}")


Predictions:
Sentence: 'I love playing Borderlands 2 so much!' -> Label: Positive
Sentence: 'Borderlands 3 is a complete disappointment' -> Label: Negative
Sentence: 'Just finished a Borderlands session today' -> Label: Positive
