# Multi-Modal Model Development

In [11]:
# Cell 1: Import necessary libraries (fixed order and added missing imports)
import os
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.model_selection import train_test_split

In [12]:
# Cell 2: Verify early CUDA availability and set device
print(f"CUDA available: {torch.cuda.is_available()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CUDA available: False


In [13]:
# Cell 3: MultiModalModel class (fixed input handling)
class MultiModalModel(nn.Module):
    def __init__(self, text_encoder, image_encoder, hidden_dim=512, num_classes=3):
        super().__init__()
        self.text_encoder = text_encoder
        self.image_encoder = image_encoder
        
        # Freeze encoders
        for param in self.text_encoder.parameters():
            param.requires_grad = False
        for param in self.image_encoder.parameters():
            param.requires_grad = False
            
        self.classifier = nn.Sequential(
            nn.Linear(768 + 2048, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, num_classes)
    
    # Modified to accept dictionary input
    def forward(self, batch):
        # Text features
        text_out = self.text_encoder(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask']
        ).last_hidden_state[:,0,:]
        
        # Image features
        img_out = self.image_encoder(batch['image']).flatten(1)
        
        # Combine and classify
        combined = torch.cat((text_out, img_out), dim=1)
        return self.classifier(combined)

SyntaxError: invalid syntax (582017593.py, line 22)

In [14]:
# Cell 4: Dataset Class (with path validation)
class HateSpeechDataset(Dataset):
    def __init__(self, dataframe, root_dir="dataset"):
        self.data = dataframe
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        try:
            row = self.data.iloc[idx]
            
            # Validate paths exist
            if not all(os.path.exists(os.path.join(self.root_dir, row[col])) 
               for col in ['input_ids', 'attention_mask', 'transformed_image_path']]):
                raise FileNotFoundError("Missing data files")
            
            return {
                'input_ids': torch.load(os.path.join(self.root_dir, row['input_ids'])),
                'attention_mask': torch.load(os.path.join(self.root_dir, row['attention_mask'])),
                'image': torch.load(os.path.join(self.root_dir, row['transformed_image_path'])),
                'labels': torch.tensor(row['labels'], dtype=torch.float)
            }
        except Exception as e:
            print(f"Skipping sample {idx}: {str(e)}")
            return None

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' on line 15 (4237077814.py, line 16)

In [15]:
# Cell 5: Data Loading with Validation (fixed split handling)
# Load and verify dataframe
full_df = pd.read_csv("dataset/dataset_transformed.csv")
full_df['labels'] = full_df['labels'].apply(eval)  # Ensure labels are lists

# Split data
train_df, val_df = train_test_split(full_df, test_size=0.2, random_state=42)

# Create datasets
train_dataset = HateSpeechDataset(train_df)
val_dataset = HateSpeechDataset(val_df)

# Create dataloaders with proper collate_fn
def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    return {
        'input_ids': torch.stack([item['input_ids'] for item in batch]),
        'attention_mask': torch.stack([item['attention_mask'] for item in batch]),
        'image': torch.stack([item['image'] for item in batch]),
        'labels': torch.stack([item['labels'] for item in batch])
    }

train_dataloader = DataLoader(
    train_dataset,
    batch_size=16,  # Reduced for stability
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=16,
    num_workers=0,
    collate_fn=collate_fn
)

In [16]:
# Cell 6: Model Initialization (with proper weight loading)
text_encoder = BertModel.from_pretrained("bert-base-uncased")
image_encoder = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
image_encoder = nn.Sequential(*list(image_encoder.children())[:-1])

model = MultiModalModel(text_encoder, image_encoder).to(device)

In [17]:
# Cell 7: Training Setup (with gradient clipping)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=2e-5, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=1)

In [18]:
# Cell 8: Training Loop (with proper validation)
best_val_loss = float('inf')
if __name__ == '__main__':
    for epoch in range(5):
        # Training
        model.train()
        train_loss = 0.0
        for batch in train_dataloader:
            optimizer.zero_grad()
            
            # Move data to device
            inputs = {k: v.to(device) for k, v in batch.items()}
            
            outputs = model(inputs)
            loss = criterion(outputs, inputs['labels'])
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_dataloader:
                inputs = {k: v.to(device) for k, v in batch.items()}
                outputs = model(inputs)
                val_loss += criterion(outputs, inputs['labels']).item()
        
        # Update scheduler
        avg_val_loss = val_loss / len(val_dataloader)
        scheduler.step(avg_val_loss)
        
        print(f"Epoch {epoch+1}/5")
        print(f"Train Loss: {train_loss/len(train_dataloader):.4f}")
        print(f"Val Loss: {avg_val_loss:.4f}")
        
        # Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), "best_model.pth")
            print("Saved new best model")

Error loading sample 3: [Errno 22] Invalid argument: 'tensor([  101, 24761,  6508,  6904, 13871,  4140,   102,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,

RuntimeError: stack expects a non-empty TensorList

In [None]:
# Cell 9: Final Model Saving (Hugging Face compatible)
print("\nTraining completed. Saving final model...")
model.load_state_dict(torch.load("best_model.pth"))
torch.save({
    'model_state': model.state_dict(),
    'text_config': text_encoder.config,
    'image_config': image_encoder.state_dict()
}, "hate_speech_multimodal_model.pth")

print("Model saved successfully!")

## Loss & Optimizer

In [None]:

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

## Model Training

In [None]:
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for input_ids, attention_mask, images, labels in dataloader:
        # Move data to GPU if available
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        images = images.to(device)
        labels = labels.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(input_ids, attention_mask, images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Update running loss
        running_loss += loss.item()
    
    # Print epoch loss
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}")

NameError: name 'torch' is not defined

## Model Evaluation

In [None]:
model.eval()
val_loss = 0.0
with torch.no_grad():
    for input_ids, attention_mask, images, labels in dataloader:
        # Move data to GPU if available
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(input_ids, attention_mask, images)
        loss = criterion(outputs, labels)
        val_loss += loss.item()

# Print validation loss
print(f"Validation Loss: {val_loss/len(dataloader):.4f}")

NameError: name 'torch' is not defined