# Multi-Modal Model Development

In [None]:
# Cell 1: Import necessary libraries (fixed order and added missing imports)
import os
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.model_selection import train_test_split



In [None]:
# Verify early CUDA availability and set device
print(f"CUDA available: {torch.cuda.is_available()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



CUDA available: False


In [43]:
# Cell 2: Path Configuration
PROJECT_ROOT = os.path.dirname(os.getcwd())  # Assuming notebook is in notebooks/
DATASET_PATH = os.path.join(PROJECT_ROOT, "notebooks/dataset")
EMBEDDINGS_PATH = os.path.join(PROJECT_ROOT, "notebooks/embeddings/embeddings_0000_50samples.pt")

In [None]:
# Multimodal Dataset Class
import ast

class MultiModalDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe
        self.valid_indices = self._validate_samples()

    def _validate_samples(self):
        valid = []
        for idx in range(len(self.data)):
            try:
                row = self.data.iloc[idx]
                img_path = os.path.join(DATASET_PATH, "transformed_images", f"{idx}.pt")
                if os.path.exists(img_path):
                    valid.append(idx)
            except:
                continue
        return valid

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, index):
        idx = self.valid_indices[index]
        row = self.data.iloc[idx]

        input_ids_path = os.path.join(DATASET_PATH, row['input_ids'])
        attention_mask_path = os.path.join(DATASET_PATH, row['attention_mask'])
        image_path = os.path.join(DATASET_PATH, "transformed_images", f"{idx}.pt")

        input_ids = torch.load(input_ids_path).squeeze(0)  # [1, 128] → [128]
        attention_mask = torch.load(attention_mask_path).squeeze(0) # [1, 128] → [128]
        # Load image tensor directly from the file
        image = torch.load(image_path)
        labels = torch.tensor(row['labels'], dtype=torch.float)
        # Return labels to tensor
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'image': image,
            'labels': labels
        }


In [45]:
# Cell 4: Collate Function: Handles padding and batching
def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if not batch:
        return {
            'input_ids': torch.zeros((1, 128), dtype=torch.long),
            'attention_mask': torch.zeros((1, 128), dtype=torch.long),
            'image': torch.zeros((1, 3, 224, 224)),
            'labels': torch.zeros((1, 3))
        }

    return {
        'input_ids': torch.stack([item['input_ids'] for item in batch]),
        'attention_mask': torch.stack([item['attention_mask'] for item in batch]),
        'image': torch.stack([item['image'] for item in batch]),
        'labels': torch.stack([item['labels'] for item in batch])
    }


In [46]:
# Cell 5: Data Preparation
df = pd.read_csv(os.path.join(DATASET_PATH, "dataset_transformed.csv"))
df['labels'] = df['labels'].apply(eval)

# Split data
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [47]:
# Cell 6: Dataset Verification
print("First 5 samples validation:")
for idx in range(5):
    sample_path = os.path.join(DATASET_PATH, "transformed_images", f"{idx}.pt")
    print(f"Sample {idx}: {os.path.exists(sample_path)}")

First 5 samples validation:
Sample 0: True
Sample 1: True
Sample 2: True
Sample 3: True
Sample 4: True


In [48]:
# Cell 7: Dataloader Initialization
train_dataset = MultiModalDataset(train_df)
val_dataset = MultiModalDataset(val_df)

train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=0
)

val_loader = DataLoader(
    val_dataset,
    batch_size=16,
    collate_fn=collate_fn,
    num_workers=0
)

In [None]:
# HateSpeechClassifier Model

class HateSpeechClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
        self.img_encoder = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
        self.img_encoder = nn.Sequential(*list(self.img_encoder.children())[:-1])
        
        self.classifier = nn.Sequential(
            nn.Linear(768 + 2048, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 3)
        )

    def forward(self, batch):
        text_features = self.text_encoder(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask']
        ).last_hidden_state[:, 0, :]  # [CLS] token

        img_features = self.img_encoder(batch['image']).squeeze(-1).squeeze(-1)  # [B, 2048, 1, 1] → [B, 2048]
        combined = torch.cat((text_features, img_features), dim=1)  # [B, 768+2048]

        return self.classifier(combined)


In [None]:
# Cell 9: Training Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HateSpeechClassifier().to(device)
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.BCEWithLogitsLoss()


In [None]:
# Cell 10: Training Loop

for epoch in range(5):
    model.train()
    total_loss = 0.0
    
    for batch in train_loader:
        optimizer.zero_grad()
        inputs = {k: v.to(device) for k, v in batch.items()}
        outputs = model(inputs)
        loss = criterion(outputs, inputs['labels'])
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        total_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            inputs = {k: v.to(device) for k, v in batch.items()}
            outputs = model(inputs)
            val_loss += criterion(outputs, inputs['labels']).item()
    

    print(f"Epoch {epoch+1}/5")
    print(f"Train Loss: {total_loss/len(train_loader):.4f}")
    print(f"Val Loss: {val_loss/len(val_loader):.4f}\n")

Epoch 1/5
Train Loss: 0.6592
Val Loss: 0.6376

Epoch 2/5
Train Loss: 0.5801
Val Loss: 0.6258

Epoch 3/5
Train Loss: 0.4930
Val Loss: 0.6153

Epoch 4/5
Train Loss: 0.3358
Val Loss: 0.6143

Epoch 5/5
Train Loss: 0.2644
Val Loss: 0.6248



In [52]:
# Cell 11: Model Saving
torch.save(model.state_dict(), os.path.join(PROJECT_ROOT, "app/model_weights.pth"))
print("Model saved successfully!")

Model saved successfully!
