# Multi-Modal Model Development

In [99]:
# Cell 1: Import necessary libraries (fixed order and added missing imports)
import os
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.model_selection import train_test_split



In [100]:
# Verify early CUDA availability and set device
print(f"CUDA available: {torch.cuda.is_available()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


CUDA available: False


In [101]:
# Cell 2: Path Configuration
PROJECT_ROOT = os.path.dirname(os.getcwd())  # Assuming notebook is in notebooks/
DATASET_PATH = os.path.join(PROJECT_ROOT, "notebooks/dataset")
EMBEDDINGS_PATH = os.path.join(PROJECT_ROOT, "notebooks/embeddings/embeddings_0001_50samples.pt")

In [102]:
# Multimodal Dataset Class
import ast

class MultiModalDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe
        self.valid_indices = self._validate_samples()

    def _validate_samples(self):
        valid = []
        for idx in range(len(self.data)):
            try:
                row = self.data.iloc[idx]
                img_path = os.path.join(DATASET_PATH, "transformed_images", f"{idx}.pt")
                if os.path.exists(img_path):
                    valid.append(idx)
            except:
                continue
        return valid

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, index):
        idx = self.valid_indices[index]
        row = self.data.iloc[idx]

        input_ids_path = os.path.join(DATASET_PATH, row['input_ids'])
        attention_mask_path = os.path.join(DATASET_PATH, row['attention_mask'])
        image_path = os.path.join(DATASET_PATH, "transformed_images", f"{idx}.pt")

        input_ids = torch.load(input_ids_path).squeeze(0)  # [1, 128] → [128]
        attention_mask = torch.load(attention_mask_path).squeeze(0) # [1, 128] → [128]
        # Load image tensor directly from the file
        image = torch.load(image_path)
        labels = torch.tensor(row['labels'], dtype=torch.float)
        # Return labels to tensor
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'image': image,
            'labels': labels
        }


In [103]:
# Cell 4: Collate Function: Handles padding and batching
def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if not batch:
        return {
            'input_ids': torch.zeros((1, 128), dtype=torch.long),
            'attention_mask': torch.zeros((1, 128), dtype=torch.long),
            'image': torch.zeros((1, 3, 224, 224)),
            'labels': torch.zeros((1, 6))
        }

    return {
        'input_ids': torch.stack([item['input_ids'] for item in batch]),
        'attention_mask': torch.stack([item['attention_mask'] for item in batch]),
        'image': torch.stack([item['image'] for item in batch]),
        'labels': torch.stack([item['labels'] for item in batch])
    }


In [104]:
# Cell 5: Data Preparation
df = pd.read_csv(os.path.join(DATASET_PATH, "dataset_transformed.csv"))
df['labels'] = df['labels'].apply(eval)

# Split data
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [105]:
# Cell 6: Dataset Verification
print("First 5 samples validation:")
for idx in range(5):
    sample_path = os.path.join(DATASET_PATH, "transformed_images", f"{idx}.pt")
    print(f"Sample {idx}: {os.path.exists(sample_path)}")

First 5 samples validation:
Sample 0: True
Sample 1: True
Sample 2: True
Sample 3: True
Sample 4: True


In [106]:
# Cell 7: Dataloader Initialization
train_dataset = MultiModalDataset(train_df)
val_dataset = MultiModalDataset(val_df)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=0
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    collate_fn=collate_fn,
    num_workers=0
)

In [107]:
# HateSpeechClassifier Model

class HateSpeechClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
        self.img_encoder = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
        self.img_encoder = nn.Sequential(*list(self.img_encoder.children())[:-1])
        
        self.classifier = nn.Sequential(
            nn.Linear(768 + 2048, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 6)
        )

    def forward(self, batch):
        text_features = self.text_encoder(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask']
        ).last_hidden_state[:, 0, :]  # [CLS] token

        img_features = self.img_encoder(batch['image']).squeeze(-1).squeeze(-1)  # [B, 2048, 1, 1] → [B, 2048]
        combined = torch.cat((text_features, img_features), dim=1)  # [B, 768+2048]

        return self.classifier(combined)


In [None]:
# Cell 9: Training Setup

from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import classification_report
import numpy as np

EPOCHS = 2
BATCH_SIZE = 32
PATIENCE = 3

model = HateSpeechClassifier().to(device)
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=total_steps
)

# If class is more imbalanced, we will use pos_weighted loss
"""
pos_weight = torch.tensor([1.0, 2.0, 3.0, 3.0, 4.0, 3.0]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
"""
criterion = nn.BCEWithLogitsLoss()



'\ndevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")\nmodel = HateSpeechClassifier().to(device)\noptimizer = optim.AdamW(model.parameters(), lr=2e-5)\ncriterion = nn.BCEWithLogitsLoss()\n'

In [None]:
# Cell 10: Training Loop

# Hate Speech Labels
LABELS = [
    "Non-Hate",
    "Racist",
    "Sexist",
    "Homophobic",
    "Religion",     
    "Other Hate"   
]

best_val_loss = float('inf')
wait = 0

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0.0
    
    for batch in train_loader:
        optimizer.zero_grad()
        inputs = {k: v.to(device) for k, v in batch.items()}
        outputs = model(inputs)
        loss = criterion(outputs, inputs['labels'])
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in val_loader:
            inputs = {k: v.to(device) for k, v in batch.items()}
            outputs = model(inputs)
            loss = criterion(outputs, inputs['labels'])
            val_loss += loss.item()

            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
            all_preds.append(preds)
            all_labels.append(inputs['labels'].cpu().numpy())

    avg_train_loss = total_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)

    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {avg_train_loss:.4f}")
    print(f"Val Loss: {avg_val_loss:.4f}")

    # Metrics
    preds_all = np.vstack(all_preds)
    labels_all = np.vstack(all_labels)
    print("Classification Report:")
    print(classification_report(labels_all, preds_all, target_names=LABELS, zero_division=0))

    # Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        wait = 0
        torch.save(model.state_dict(), os.path.join(PROJECT_ROOT, "app/best_model_weights.pth"))
        print("✅ Best model saved.")
    else:
        wait += 1
        if wait >= PATIENCE:
            print("⛔ Early stopping triggered.")
            break




Epoch 1/2
Train Loss: 0.6956
Val Loss: 0.6639
Classification Report:
              precision    recall  f1-score   support

    Non-Hate       0.89      0.89      0.89         9
      Racist       0.00      0.00      0.00         4
      Sexist       1.00      0.50      0.67         2
  Homophobic       0.20      1.00      0.33         1
    Religion       0.00      0.00      0.00         0
  Other Hate       0.20      1.00      0.33         1

   micro avg       0.48      0.65      0.55        17
   macro avg       0.38      0.56      0.37        17
weighted avg       0.61      0.65      0.59        17
 samples avg       0.51      0.72      0.56        17

✅ Best model saved.
Epoch 2/2
Train Loss: 0.6480
Val Loss: 0.6434
Classification Report:
              precision    recall  f1-score   support

    Non-Hate       0.90      1.00      0.95         9
      Racist       0.00      0.00      0.00         4
      Sexist       0.00      0.00      0.00         2
  Homophobic       0.33     

'\n\nfor epoch in range(5):\n    model.train()\n    total_loss = 0.0\n    \n    for batch in train_loader:\n        optimizer.zero_grad()\n        inputs = {k: v.to(device) for k, v in batch.items()}\n        outputs = model(inputs)\n        loss = criterion(outputs, inputs[\'labels\'])\n        loss.backward()\n        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n        optimizer.step()\n        total_loss += loss.item()\n\n    # Validation\n    model.eval()\n    val_loss = 0.0\n    with torch.no_grad():\n        for batch in val_loader:\n            inputs = {k: v.to(device) for k, v in batch.items()}\n            outputs = model(inputs)\n            val_loss += criterion(outputs, inputs[\'labels\']).item()\n    \n\n    print(f"Epoch {epoch+1}/5")\n    print(f"Train Loss: {total_loss/len(train_loader):.4f}")\n    print(f"Val Loss: {val_loss/len(val_loader):.4f}\n")\n\n\n'