
#RNN_CNN_Models

In [None]:
# prompt: mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import pickle
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/Data Science/Arabic Sentiment/Datasets/arabic_sentiment_reviews.csv')

# Load FastText features
with open("/content/drive/MyDrive/Data Science/Arabic Sentiment/Features/training_data.pkl", 'rb') as f:
    training_data = pickle.load(f)

X = training_data['X_fasttext']


In [None]:
# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['label'])
y_cat = tf.keras.utils.to_categorical(y)

# Split into train, validation, and test
X_temp, X_test, y_temp, y_test = train_test_split(X, y_cat, test_size=0.2, stratify=y_cat, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.1, stratify=y_temp, random_state=42)


In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np # Import numpy
# Convert to tensors
def to_tensor(data, labels):
    return TensorDataset(torch.tensor(data, dtype=torch.float32),
                         torch.tensor(np.argmax(labels, axis=1), dtype=torch.long))

train_dataset = to_tensor(X_train, y_train)
val_dataset = to_tensor(X_val, y_val)
test_dataset = to_tensor(X_test, y_test)

# Dataloaders
BATCH_SIZE = 64
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)


In [None]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout):
        super().__init__()
        self.rnn = nn.LSTM(input_dim, hidden_dim, num_layers=n_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x.unsqueeze(1)  # [batch, seq, feat] -> simulate sequence dim
        _, (hidden, _) = self.rnn(x)
        return self.fc(hidden[-1])

class CNN(nn.Module):
    def __init__(self, input_dim, output_dim, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(1, 100, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Linear(100, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x.unsqueeze(1)  # [B, 1, F]
        x = self.pool(self.relu(self.conv1(x))).squeeze(2)
        x = self.dropout(x)
        return self.fc(x)


In [None]:
def train(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0
    for X_batch, y_batch in dataloader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()

            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            labels = y_batch.cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels)

    avg_loss = total_loss / len(dataloader)
    acc = accuracy_score(all_labels, all_preds)
    mse = mean_squared_error(all_labels, all_preds)
    performance = ((1 - avg_loss) + acc) / 2

    return avg_loss, acc, mse, performance


In [None]:
from sklearn.metrics import accuracy_score, mean_squared_error


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
INPUT_DIM = X.shape[1]
OUTPUT_DIM = y_cat.shape[1]
DROPOUT = 0.5
N_EPOCHS = 10

# --- RNN ---
rnn_model = RNN(input_dim=INPUT_DIM, hidden_dim=128, output_dim=OUTPUT_DIM, n_layers=2, dropout=DROPOUT).to(device)
optimizer_rnn = torch.optim.Adam(rnn_model.parameters())
criterion = nn.CrossEntropyLoss()

print("Training RNN...")
for epoch in range(N_EPOCHS):
    train_loss = train(rnn_model, train_loader, optimizer_rnn, criterion)
    print(f"[RNN] Epoch {epoch+1}: Train Loss = {train_loss:.4f}")

print("\nFinal Evaluation for RNN:")
train_eval = evaluate(rnn_model, train_loader, criterion)
val_eval = evaluate(rnn_model, val_loader, criterion)
test_eval = evaluate(rnn_model, test_loader, criterion)

print(f"Train → Loss: {train_eval[0]:.4f}, Acc: {train_eval[1]:.4f}, MSE: {train_eval[2]:.4f}, Perf: {train_eval[3]:.4f}")
print(f"Val   → Loss: {val_eval[0]:.4f}, Acc: {val_eval[1]:.4f}, MSE: {val_eval[2]:.4f}, Perf: {val_eval[3]:.4f}")
print(f"Test  → Loss: {test_eval[0]:.4f}, Acc: {test_eval[1]:.4f}, MSE: {test_eval[2]:.4f}, Perf: {test_eval[3]:.4f}")


# torch.save(rnn_model.state_dict(), '/content/drive/MyDrive/Data Science/rnn_model.pt')
# print("✅ RNN model saved.")

# --- CNN ---
cnn_model = CNN(input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, dropout=DROPOUT).to(device)
optimizer_cnn = torch.optim.Adam(cnn_model.parameters())

print("Training CNN...")
for epoch in range(N_EPOCHS):
    train_loss = train(cnn_model, train_loader, optimizer_cnn, criterion)
    print(f"[CNN] Epoch {epoch+1}: Train Loss = {train_loss:.4f}")

print("\nFinal Evaluation for CNN:")
train_eval = evaluate(cnn_model, train_loader, criterion)
val_eval = evaluate(cnn_model, val_loader, criterion)
test_eval = evaluate(cnn_model, test_loader, criterion)

print(f"Train → Loss: {train_eval[0]:.4f}, Acc: {train_eval[1]:.4f}, MSE: {train_eval[2]:.4f}, Perf: {train_eval[3]:.4f}")
print(f"Val   → Loss: {val_eval[0]:.4f}, Acc: {val_eval[1]:.4f}, MSE: {val_eval[2]:.4f}, Perf: {val_eval[3]:.4f}")
print(f"Test  → Loss: {test_eval[0]:.4f}, Acc: {test_eval[1]:.4f}, MSE: {test_eval[2]:.4f}, Perf: {test_eval[3]:.4f}")



# torch.save(cnn_model.state_dict(), '/content/drive/MyDrive/Data Science/cnn_model.pt')
# print("✅ CNN model saved.")


Training RNN...
[RNN] Epoch 1: Train Loss = 0.4737
[RNN] Epoch 2: Train Loss = 0.4508
[RNN] Epoch 3: Train Loss = 0.4457
[RNN] Epoch 4: Train Loss = 0.4424
[RNN] Epoch 5: Train Loss = 0.4399
[RNN] Epoch 6: Train Loss = 0.4377
[RNN] Epoch 7: Train Loss = 0.4355
[RNN] Epoch 8: Train Loss = 0.4326
[RNN] Epoch 9: Train Loss = 0.4303
[RNN] Epoch 10: Train Loss = 0.4288

Final Evaluation for RNN:
Train → Loss: 0.4180, Acc: 0.8073, MSE: 0.1927, Perf: 0.6946
Val   → Loss: 0.4187, Acc: 0.8073, MSE: 0.1927, Perf: 0.6943
Test  → Loss: 0.4241, Acc: 0.8033, MSE: 0.1967, Perf: 0.6896
✅ RNN model saved.
Training CNN...
[CNN] Epoch 1: Train Loss = 0.6840
[CNN] Epoch 2: Train Loss = 0.6757
[CNN] Epoch 3: Train Loss = 0.6733
[CNN] Epoch 4: Train Loss = 0.6719
[CNN] Epoch 5: Train Loss = 0.6715
[CNN] Epoch 6: Train Loss = 0.6711
[CNN] Epoch 7: Train Loss = 0.6707
[CNN] Epoch 8: Train Loss = 0.6702
[CNN] Epoch 9: Train Loss = 0.6706
[CNN] Epoch 10: Train Loss = 0.6701

Final Evaluation for CNN:
Train → Lo

In [None]:
!pip install praw

Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting prawcore<3,>=2.4 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update_checker>=0.18 (from praw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading prawcore-2.4.0-py3-none-any.whl (17 kB)
Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Installing collected packages: update_checker, prawcore, praw
Successfully installed praw-7.8.1 prawcore-2.4.0 update_checker-0.18.0
