In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from transformers import RobertaTokenizer, RobertaModel
import pandas as pd
from PIL import Image


In [2]:
import pandas as pd
import pickle

# Load the Excel file
df = pd.read_excel('/kaggle/input/cyber-excel/Copy of Cyberbully_corrected_emotion_sentiment.xlsx')
df = df.drop(columns=['Unnamed: 10', 'Unnamed: 11'])
# Display the first few rows to ensure it is loaded correctly
# print(df)

df_cleaned = df.dropna()
df=df_cleaned
# print(df)
import os
import pandas as pd

# Assuming your DataFrame is called df and contains the 'img_id' column
# Assuming image paths are in a directory (img_dir) and filenames correspond to 'img_id'

img_dir = "/kaggle/input/multibully/bully_data"

# Define a function to check if the image size is zero
def is_zero_size(img_id, img_dir):
    img_path = os.path.join(img_dir, img_id)
    return os.path.exists(img_path) and os.path.getsize(img_path) == 0

# Filter out rows with zero-size images
df['is_zero_size'] = df['Img_Name'].apply(lambda img_id: is_zero_size(img_id, img_dir))
df_filtered = df[df['is_zero_size'] == False].drop(columns='is_zero_size')

# Now, df_filtered contains only rows with non-zero-size images
# print(df_filtered)
df=df_filtered
df_cleaned = df[df['Img_Name'] != '2644.jpg']
df=df_cleaned


In [3]:
df.shape

(3078, 10)

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [5]:
df.head(1)

Unnamed: 0,Img_Name,Img_Text,Img_Text_Label,Img_Label,Text_Label,Sentiment,Emotion,Sarcasm,Harmful_Score,Target
0,0.jpg,Shivam @shivamishraa Girls be named naina and ...,Bully,Nonbully,Bully,Negative,Disgust,Yes,Partially-Harmful,Individual


In [6]:
class MemeDataset(Dataset):
    def __init__(self, dataframe, transform):
        self.dataframe = dataframe
        self.transform = transform
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.img_folder = '/kaggle/input/multibully/bully_data'
        # Define label mappings
        self.text_label_mapping = {
            "Bully": 1,
            "Nonbully": 0
        }
        
        self.sentiment_mapping = {
            "Positive":1,
            "Neutral": 0,
            "Negative": 2
        }
        
        self.emotion_mapping = {
            "Disgust": 0,
            "Ridicule": 1,
            "Sadness": 2,
            "Surprise": 3,
            "Anticipation": 4,
            "Angry": 5,
            "Happiness": 6,
            "Other": 7,
            "Trust": 8,
            "Fear": 9
        }
        
        self.sarcasm_mapping = {
            "Yes": 1,
            "No": 0
        }
        
        self.harmful_score_mapping = {
            "Harmless": 0,
            "Partially-Harmful": 1,
            "Very-Harmful": 2
        }
        
        self.target_mapping = {
            "Individual": 0,
            "Society": 1,
            "Organization": 2,
            "Community": 3
        }
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        # Load image
        img_name = self.dataframe.iloc[idx]['Img_Name']
        img_path = os.path.join(self.img_folder, img_name)
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        
        # Load and tokenize text
        text = self.dataframe.iloc[idx]['Img_Text']
        inputs = self.tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
        
        # Get labels and apply mappings
        sentiment_label = torch.tensor(self.sentiment_mapping[self.dataframe.iloc[idx]['Sentiment']], dtype=torch.long)
        emotion_label = torch.tensor(self.emotion_mapping[self.dataframe.iloc[idx]['Emotion']], dtype=torch.long)
        sarcasm_label = torch.tensor(self.sarcasm_mapping[self.dataframe.iloc[idx]['Sarcasm']], dtype=torch.float)  # Binary sarcasm
        bully_label = torch.tensor(self.text_label_mapping[self.dataframe.iloc[idx]['Img_Label']], dtype=torch.long)  # Bully detection
        harmful_score_label = torch.tensor(self.harmful_score_mapping[self.dataframe.iloc[idx]['Harmful_Score']], dtype=torch.long)
        target_label = torch.tensor(self.target_mapping[self.dataframe.iloc[idx]['Target']], dtype=torch.long)
        
        return image, inputs['input_ids'].squeeze(), inputs['attention_mask'].squeeze(), sentiment_label, emotion_label, sarcasm_label, bully_label, harmful_score_label, target_label


In [7]:
class Image_text(nn.Module):
    def __init__(self):
        super(Image_text, self).__init__()
        
        # Visual branch (CNN)
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the final classification layer
        
        # Textual branch (RoBERTa)
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        
        # Shared fully connected layers
        self.fc_shared = nn.Sequential(
            nn.Linear(2048 + 768, 512),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Task-specific heads
        self.sentiment_head = nn.Linear(512, 3)  # Sentiment: 3 classes
        self.emotion_head = nn.Linear(512, 10)    # Emotion: 6 classes
        self.sarcasm_head = nn.Linear(512, 1)    # Sarcasm: binary classification
        self.bully_head = nn.Linear(512, 2)      # Cyberbullying: 2 classes
        self.harmful_head = nn.Linear(512, 3)
        self.target_head = nn.Linear(512, 4)
    def forward(self, image, text_input_ids, text_attention_mask):
        # Visual features
        img_features = self.resnet(image)
        
        # Textual features
        text_outputs = self.roberta(input_ids=text_input_ids, attention_mask=text_attention_mask)
        text_features = text_outputs.pooler_output
        
        # Concatenate the visual and textual features
        combined_features = torch.cat((img_features, text_features), dim=1)
        
        # Shared layers
        shared_out = self.fc_shared(combined_features)
        
        # Task-specific outputs
        sentiment_out = self.sentiment_head(shared_out)
        emotion_out = self.emotion_head(shared_out)
        sarcasm_out = torch.sigmoid(self.sarcasm_head(shared_out))
        bully_out = self.bully_head(shared_out)
        harmful_out = self.harmful_head(shared_out)
        target_out = self.target_head(shared_out)
        
        return sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_out, target_out


In [8]:
class Image_text_emotion(nn.Module):
    def __init__(self):
        super(Image_text_emotion, self).__init__()
        
        # Visual branch (CNN)
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the final classification layer
        
        # Textual branch (RoBERTa)
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        
        # Shared fully connected layers
        self.fc_shared = nn.Sequential(
            nn.Linear(2048 + 768, 512),  # Concatenation of visual and textual features
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Task-specific heads
        self.sentiment_head = nn.Linear(512, 3)  # Sentiment: 3 classes
        self.emotion_head = nn.Linear(512, 10)  # Emotion: 10 classes
        self.sarcasm_head = nn.Linear(512, 1)  # Sarcasm: binary classification
        # self.bully_head = nn.Linear(512, 2)  # Bully: binary classification
        self.bully_fc = nn.Sequential(
            nn.Linear(10 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 2)  # 2 classes for bully
        )
        # self.harmful_head = nn.Linear(512, 3)  # Harmful score: 3 classes
        self.harmful_fc = nn.Sequential(
            nn.Linear(10 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 3)  # 3 classes for harmful
        )
        # Final target head
        self.target_fc = nn.Sequential(
            nn.Linear(10 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 4)  # 4 classes for Target
        )
        
    def forward(self, image, text_input_ids, text_attention_mask):
        # Visual features
        img_features = self.resnet(image)
        
        # Textual features
        text_outputs = self.roberta(input_ids=text_input_ids, attention_mask=text_attention_mask)
        text_features = text_outputs.pooler_output
        
        # Concatenate the visual and textual features
        combined_features = torch.cat((img_features, text_features), dim=1)
        
        # Shared features
        shared_out = self.fc_shared(combined_features)
        
        # Task-specific predictions
        sentiment_out = self.sentiment_head(shared_out)
        emotion_out = self.emotion_head(shared_out)
        sarcasm_out = torch.sigmoid(self.sarcasm_head(shared_out))  # Binary
        # bully_out = self.bully_head(shared_out)
        # harmful_out = self.harmful_head(shared_out)
        
        # Concatenate all task outputs with shared features for target prediction
        aux_features = torch.cat((
            emotion_out,
            shared_out  # Shared features
        ), dim=1)
        
        # Final target prediction
        bully_out = self.bully_fc(aux_features)
        harmful_out = self.harmful_fc(aux_features)
        target_out = self.target_fc(aux_features)
        
        return sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_out, target_out


In [9]:
class Image_text_sentiment(nn.Module):
    def __init__(self):
        super(Image_text_sentiment, self).__init__()
        
        # Visual branch (CNN)
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the final classification layer
        
        # Textual branch (RoBERTa)
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        
        # Shared fully connected layers
        self.fc_shared = nn.Sequential(
            nn.Linear(2048 + 768, 512),  # Concatenation of visual and textual features
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Task-specific heads
        self.sentiment_head = nn.Linear(512, 3)  # Sentiment: 3 classes
        self.emotion_head = nn.Linear(512, 10)  # Emotion: 10 classes
        self.sarcasm_head = nn.Linear(512, 1)  # Sarcasm: binary classification
        # self.bully_head = nn.Linear(512, 2)  # Bully: binary classification
        self.bully_fc = nn.Sequential(
            nn.Linear(3 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 2)  # 2 classes for bully
        )
        # self.harmful_head = nn.Linear(512, 3)  # Harmful score: 3 classes
        self.harmful_fc = nn.Sequential(
            nn.Linear(3 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 3)  # 3 classes for harmful
        )
        # Final target head
        self.target_fc = nn.Sequential(
            nn.Linear(3 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 4)  # 4 classes for Target
        )
        
    def forward(self, image, text_input_ids, text_attention_mask):
        # Visual features
        img_features = self.resnet(image)
        
        # Textual features
        text_outputs = self.roberta(input_ids=text_input_ids, attention_mask=text_attention_mask)
        text_features = text_outputs.pooler_output
        
        # Concatenate the visual and textual features
        combined_features = torch.cat((img_features, text_features), dim=1)
        
        # Shared features
        shared_out = self.fc_shared(combined_features)
        
        # Task-specific predictions
        sentiment_out = self.sentiment_head(shared_out)
        emotion_out = self.emotion_head(shared_out)
        sarcasm_out = torch.sigmoid(self.sarcasm_head(shared_out))  # Binary
        # bully_out = self.bully_head(shared_out)
        # harmful_out = self.harmful_head(shared_out)
        
        # Concatenate all task outputs with shared features for target prediction
        aux_features = torch.cat((
            sentiment_out,
            shared_out  # Shared features
        ), dim=1)
        
        # Final target prediction
        bully_out = self.bully_fc(aux_features)
        harmful_out = self.harmful_fc(aux_features)
        target_out = self.target_fc(aux_features)
        
        return sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_out, target_out


In [10]:
class Image_text_emotion_sentiment(nn.Module):
    def __init__(self):
        super(Image_text_emotion_sentiment, self).__init__()
        
        # Visual branch (CNN)
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the final classification layer
        
        # Textual branch (RoBERTa)
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        
        # Shared fully connected layers
        self.fc_shared = nn.Sequential(
            nn.Linear(2048 + 768, 512),  # Concatenation of visual and textual features
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Task-specific heads
        self.sentiment_head = nn.Linear(512, 3)  # Sentiment: 3 classes
        self.emotion_head = nn.Linear(512, 10)  # Emotion: 10 classes
        self.sarcasm_head = nn.Linear(512, 1)  # Sarcasm: binary classification
        # self.bully_head = nn.Linear(512, 2)  # Bully: binary classification
        self.bully_fc = nn.Sequential(
            nn.Linear(10 + 3 + 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 2)  # 2 classes for bully
        )
        # self.harmful_head = nn.Linear(512, 3)  # Harmful score: 3 classes
        self.harmful_fc = nn.Sequential(
            nn.Linear(10 + 3+ 512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 3)  # 3 classes for harmful
        )
        # Final target head
        self.target_fc = nn.Sequential(
            nn.Linear(10 + 3 +  512, 256),  # Input: all task outputs + shared features
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 4)  # 4 classes for Target
        )
        
    def forward(self, image, text_input_ids, text_attention_mask):
        # Visual features
        img_features = self.resnet(image)
        
        # Textual features
        text_outputs = self.roberta(input_ids=text_input_ids, attention_mask=text_attention_mask)
        text_features = text_outputs.pooler_output
        
        # Concatenate the visual and textual features
        combined_features = torch.cat((img_features, text_features), dim=1)
        
        # Shared features
        shared_out = self.fc_shared(combined_features)
        
        # Task-specific predictions
        sentiment_out = self.sentiment_head(shared_out)
        emotion_out = self.emotion_head(shared_out)
        sarcasm_out = torch.sigmoid(self.sarcasm_head(shared_out))  # Binary
        # bully_out = self.bully_head(shared_out)
        # harmful_out = self.harmful_head(shared_out)
        
        # Concatenate all task outputs with shared features for target prediction
        aux_features = torch.cat((
            emotion_out,
            sentiment_out,
            shared_out  # Shared features
        ), dim=1)
        
        # Final target prediction
        bully_out = self.bully_fc(aux_features)
        harmful_out = self.harmful_fc(aux_features)
        target_out = self.target_fc(aux_features)
        
        return sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_out, target_out


In [11]:
from torch.utils.data import random_split
from sklearn.metrics import accuracy_score, f1_score

In [12]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create the dataset and dataloader
dataset = MemeDataset(df, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
# Create data loaders
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# dataloader = DataLoader(dataset, batch_size=16, shuffle=True)  # Adjust batch size as needed




tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]



## Image and text

In [13]:
# Initialize the model and move it to the GPU
model= Image_text()
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)  # Use DataParallel if multiple GPUs are available
model.to(device) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# Loss functions
loss_fn_sentiment = nn.CrossEntropyLoss().to(device)
loss_fn_emotion = nn.CrossEntropyLoss().to(device)
loss_fn_sarcasm = nn.BCEWithLogitsLoss().to(device)
loss_fn_bully = nn.CrossEntropyLoss().to(device)
loss_fn_harmful_score = nn.CrossEntropyLoss().to(device)
loss_fn_target = nn.CrossEntropyLoss().to(device)

# Training loop
for epoch in range(15):  # Set epochs accordingly
    model.train()
    
    total_loss = 0  # Initialize total loss for the epoch
    
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in train_dataloader:
        # Move data to the GPU
    # for images, text_input_ids, text_attention_mask, bully_labels in train_dataloader:
  
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)
        
        optimizer.zero_grad()  # Clear gradients at the start of each batch
        
        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)
        
        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())  # Squeeze if necessary
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)
        
        # Total loss (sum or weigh the losses as needed)
        total_loss_batch = loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target
        
        # Backward pass and optimization
        total_loss_batch.backward()
        optimizer.step()  # Update model parameters
        
        total_loss += total_loss_batch.item()  # Accumulate loss for the epoch

    # Optionally clear cache at the end of each epoch
    torch.cuda.empty_cache()  
    
    # Print the average loss for the epoch
    avg_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch}, Average Loss: {avg_loss:.4f}')


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 208MB/s]


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch 0, Average Loss: 4.6428
Epoch 1, Average Loss: 4.3181
Epoch 2, Average Loss: 3.9292
Epoch 3, Average Loss: 3.4435
Epoch 4, Average Loss: 2.9588
Epoch 5, Average Loss: 2.6406
Epoch 6, Average Loss: 2.2862
Epoch 7, Average Loss: 2.0696
Epoch 8, Average Loss: 1.8450
Epoch 9, Average Loss: 1.7172
Epoch 10, Average Loss: 1.6026
Epoch 11, Average Loss: 1.5782
Epoch 12, Average Loss: 1.4626
Epoch 13, Average Loss: 1.3972
Epoch 14, Average Loss: 1.3827


In [15]:
import numpy as np

In [16]:
# Import required libraries (if not already done)
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score

# Validation phase
model.eval()  # Set model to evaluation mode
total_val_loss = 0

# Initialize lists to store true and predicted labels for each task
all_labels_bully = []
all_preds_bully = []

all_labels_sentiment = []
all_preds_sentiment = []

all_labels_emotion = []
all_preds_emotion = []

all_labels_sarcasm = []
all_preds_sarcasm = []

all_labels_harmful_score = []
all_preds_harmful_score = []

all_labels_target = []
all_preds_target = []

with torch.no_grad():  # Disable gradient calculation
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in val_dataloader:
        # Move data to the GPU
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)

        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)

        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)

        # Total loss
        total_val_loss += (loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target).item()

        # Get predictions for each task
        _, predicted_sentiment = torch.max(sentiment_out, 1)
        _, predicted_emotion = torch.max(emotion_out, 1)
        _, predicted_sarcasm = torch.max(sarcasm_out, 1)
        _, predicted_bully = torch.max(bully_out, 1)
        _, predicted_harmful_score = torch.max(harmful_score_out, 1)  # Assuming multi-class
        _, predicted_target = torch.max(target_out, 1)  # Assuming multi-class

        # Collect true and predicted labels for each task
        all_labels_sentiment.append(sentiment_labels.cpu().numpy())
        all_preds_sentiment.append(predicted_sentiment.cpu().numpy())

        all_labels_emotion.append(emotion_labels.cpu().numpy())
        all_preds_emotion.append(predicted_emotion.cpu().numpy())

        all_labels_sarcasm.append(sarcasm_labels.cpu().numpy())
        all_preds_sarcasm.append(predicted_sarcasm.cpu().numpy())

        all_labels_bully.append(bully_labels.cpu().numpy())
        all_preds_bully.append(predicted_bully.cpu().numpy())

        all_labels_harmful_score.append(harmful_score_labels.cpu().numpy())
        all_preds_harmful_score.append(predicted_harmful_score.cpu().numpy())

        all_labels_target.append(target_labels.cpu().numpy())
        all_preds_target.append(predicted_target.cpu().numpy())

avg_val_loss = total_val_loss / len(val_dataloader)

# Flatten lists for each task
all_labels_bully = np.concatenate(all_labels_bully)
all_preds_bully = np.concatenate(all_preds_bully)

all_labels_sentiment = np.concatenate(all_labels_sentiment)
all_preds_sentiment = np.concatenate(all_preds_sentiment)

all_labels_emotion = np.concatenate(all_labels_emotion)
all_preds_emotion = np.concatenate(all_preds_emotion)

all_labels_sarcasm = np.concatenate(all_labels_sarcasm)
all_preds_sarcasm = np.concatenate(all_preds_sarcasm)

all_labels_harmful_score = np.concatenate(all_labels_harmful_score)
all_preds_harmful_score = np.concatenate(all_preds_harmful_score)

all_labels_target = np.concatenate(all_labels_target)
all_preds_target = np.concatenate(all_preds_target)

# Calculate accuracy and F1 score for each task
accuracy_bully = accuracy_score(all_labels_bully, all_preds_bully)
f1_bully = f1_score(all_labels_bully, all_preds_bully, average='weighted')

accuracy_sentiment = accuracy_score(all_labels_sentiment, all_preds_sentiment)
f1_sentiment = f1_score(all_labels_sentiment, all_preds_sentiment, average='weighted')

accuracy_emotion = accuracy_score(all_labels_emotion, all_preds_emotion)
f1_emotion = f1_score(all_labels_emotion, all_preds_emotion, average='weighted')

accuracy_sarcasm = accuracy_score(all_labels_sarcasm, all_preds_sarcasm)
f1_sarcasm = f1_score(all_labels_sarcasm, all_preds_sarcasm, average='weighted')

accuracy_harmful_score = accuracy_score(all_labels_harmful_score, all_preds_harmful_score)
f1_harmful_score = f1_score(all_labels_harmful_score, all_preds_harmful_score, average='weighted')

accuracy_target = accuracy_score(all_labels_target, all_preds_target)
f1_target = f1_score(all_labels_target, all_preds_target, average='weighted')

print(f'Epoch {epoch}, Validation Loss: {avg_val_loss:.4f},\n'
      f'Bully Accuracy: {accuracy_bully:.4f}, F1 Score: {f1_bully:.4f},\n'
      f'Sentiment Accuracy: {accuracy_sentiment:.4f}, F1 Score: {f1_sentiment:.4f},\n'
      f'Emotion Accuracy: {accuracy_emotion:.4f}, F1 Score: {f1_emotion:.4f},\n'
      f'Sarcasm Accuracy: {accuracy_sarcasm:.4f}, F1 Score: {f1_sarcasm:.4f},\n'
      f'Harmful Score Accuracy: {accuracy_harmful_score:.4f}, F1 Score: {f1_harmful_score:.4f},\n'
      f'Target Accuracy: {accuracy_target:.4f}, F1 Score: {f1_target:.4f}')




Epoch 14, Validation Loss: 8.5395,
Bully Accuracy: 0.7744, F1 Score: 0.7669,
Sentiment Accuracy: 0.6299, F1 Score: 0.6479,
Emotion Accuracy: 0.2175, F1 Score: 0.2278,
Sarcasm Accuracy: 0.5114, F1 Score: 0.3460,
Harmful Score Accuracy: 0.9789, F1 Score: 0.9685,
Target Accuracy: 0.7062, F1 Score: 0.6555


# Image_text_emotion

In [17]:
# Initialize the model and move it to the GPU
model= Image_text_emotion()
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)  # Use DataParallel if multiple GPUs are available
model.to(device) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# Loss functions
loss_fn_sentiment = nn.CrossEntropyLoss().to(device)
loss_fn_emotion = nn.CrossEntropyLoss().to(device)
loss_fn_sarcasm = nn.BCEWithLogitsLoss().to(device)
loss_fn_bully = nn.CrossEntropyLoss().to(device)
loss_fn_harmful_score = nn.CrossEntropyLoss().to(device)
loss_fn_target = nn.CrossEntropyLoss().to(device)

# Training loop
for epoch in range(15):  # Set epochs accordingly
    model.train()
    
    total_loss = 0  # Initialize total loss for the epoch
    
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in train_dataloader:
        # Move data to the GPU
    # for images, text_input_ids, text_attention_mask, bully_labels in train_dataloader:
  
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)
        
        optimizer.zero_grad()  # Clear gradients at the start of each batch
        
        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)
        
        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())  # Squeeze if necessary
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)
        
        # Total loss (sum or weigh the losses as needed)
        total_loss_batch = loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target
        
        # Backward pass and optimization
        total_loss_batch.backward()
        optimizer.step()  # Update model parameters
        
        total_loss += total_loss_batch.item()  # Accumulate loss for the epoch

    # Optionally clear cache at the end of each epoch
    torch.cuda.empty_cache()  
    
    # Print the average loss for the epoch
    avg_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch}, Average Loss: {avg_loss:.4f}')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch 0, Average Loss: 4.7081
Epoch 1, Average Loss: 4.3522
Epoch 2, Average Loss: 3.9955
Epoch 3, Average Loss: 3.4775
Epoch 4, Average Loss: 3.0103
Epoch 5, Average Loss: 2.6499
Epoch 6, Average Loss: 2.3585
Epoch 7, Average Loss: 2.1452
Epoch 8, Average Loss: 2.0362
Epoch 9, Average Loss: 1.8962
Epoch 10, Average Loss: 1.7373
Epoch 11, Average Loss: 1.7050
Epoch 12, Average Loss: 1.6027
Epoch 13, Average Loss: 1.5829
Epoch 14, Average Loss: 1.5226


In [18]:
# Import required libraries (if not already done)
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score

# Validation phase
model.eval()  # Set model to evaluation mode
total_val_loss = 0

# Initialize lists to store true and predicted labels for each task
all_labels_bully = []
all_preds_bully = []

all_labels_sentiment = []
all_preds_sentiment = []

all_labels_emotion = []
all_preds_emotion = []

all_labels_sarcasm = []
all_preds_sarcasm = []

all_labels_harmful_score = []
all_preds_harmful_score = []

all_labels_target = []
all_preds_target = []

with torch.no_grad():  # Disable gradient calculation
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in val_dataloader:
        # Move data to the GPU
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)

        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)

        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)

        # Total loss
        total_val_loss += (loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target).item()

        # Get predictions for each task
        _, predicted_sentiment = torch.max(sentiment_out, 1)
        _, predicted_emotion = torch.max(emotion_out, 1)
        _, predicted_sarcasm = torch.max(sarcasm_out, 1)
        _, predicted_bully = torch.max(bully_out, 1)
        _, predicted_harmful_score = torch.max(harmful_score_out, 1)  # Assuming multi-class
        _, predicted_target = torch.max(target_out, 1)  # Assuming multi-class

        # Collect true and predicted labels for each task
        all_labels_sentiment.append(sentiment_labels.cpu().numpy())
        all_preds_sentiment.append(predicted_sentiment.cpu().numpy())

        all_labels_emotion.append(emotion_labels.cpu().numpy())
        all_preds_emotion.append(predicted_emotion.cpu().numpy())

        all_labels_sarcasm.append(sarcasm_labels.cpu().numpy())
        all_preds_sarcasm.append(predicted_sarcasm.cpu().numpy())

        all_labels_bully.append(bully_labels.cpu().numpy())
        all_preds_bully.append(predicted_bully.cpu().numpy())

        all_labels_harmful_score.append(harmful_score_labels.cpu().numpy())
        all_preds_harmful_score.append(predicted_harmful_score.cpu().numpy())

        all_labels_target.append(target_labels.cpu().numpy())
        all_preds_target.append(predicted_target.cpu().numpy())

avg_val_loss = total_val_loss / len(val_dataloader)

# Flatten lists for each task
all_labels_bully = np.concatenate(all_labels_bully)
all_preds_bully = np.concatenate(all_preds_bully)

all_labels_sentiment = np.concatenate(all_labels_sentiment)
all_preds_sentiment = np.concatenate(all_preds_sentiment)

all_labels_emotion = np.concatenate(all_labels_emotion)
all_preds_emotion = np.concatenate(all_preds_emotion)

all_labels_sarcasm = np.concatenate(all_labels_sarcasm)
all_preds_sarcasm = np.concatenate(all_preds_sarcasm)

all_labels_harmful_score = np.concatenate(all_labels_harmful_score)
all_preds_harmful_score = np.concatenate(all_preds_harmful_score)

all_labels_target = np.concatenate(all_labels_target)
all_preds_target = np.concatenate(all_preds_target)

# Calculate accuracy and F1 score for each task
accuracy_bully_em = accuracy_score(all_labels_bully, all_preds_bully)
f1_bully_em = f1_score(all_labels_bully, all_preds_bully, average='weighted')

accuracy_sentiment_em = accuracy_score(all_labels_sentiment, all_preds_sentiment)
f1_sentiment_em = f1_score(all_labels_sentiment, all_preds_sentiment, average='weighted')

accuracy_emotion_em = accuracy_score(all_labels_emotion, all_preds_emotion)
f1_emotion_em = f1_score(all_labels_emotion, all_preds_emotion, average='weighted')

accuracy_sarcasm_em = accuracy_score(all_labels_sarcasm, all_preds_sarcasm)
f1_sarcasm_em = f1_score(all_labels_sarcasm, all_preds_sarcasm, average='weighted')

accuracy_harmful_score_em = accuracy_score(all_labels_harmful_score, all_preds_harmful_score)
f1_harmful_score_em = f1_score(all_labels_harmful_score, all_preds_harmful_score, average='weighted')

accuracy_target_em = accuracy_score(all_labels_target, all_preds_target)
f1_target_em = f1_score(all_labels_target, all_preds_target, average='weighted')

print(f'Epoch {epoch}, Validation Loss: {avg_val_loss:.4f},\n'
      f'Bully Accuracy: {accuracy_bully:.4f}, F1 Score: {f1_bully:.4f},\n'
      f'Sentiment Accuracy: {accuracy_sentiment:.4f}, F1 Score: {f1_sentiment:.4f},\n'
      f'Emotion Accuracy: {accuracy_emotion:.4f}, F1 Score: {f1_emotion:.4f},\n'
      f'Sarcasm Accuracy: {accuracy_sarcasm:.4f}, F1 Score: {f1_sarcasm:.4f},\n'
      f'Harmful Score Accuracy: {accuracy_harmful_score:.4f}, F1 Score: {f1_harmful_score:.4f},\n'
      f'Target Accuracy: {accuracy_target:.4f}, F1 Score: {f1_target:.4f}')




Epoch 14, Validation Loss: 8.0432,
Bully Accuracy: 0.7744, F1 Score: 0.7669,
Sentiment Accuracy: 0.6299, F1 Score: 0.6479,
Emotion Accuracy: 0.2175, F1 Score: 0.2278,
Sarcasm Accuracy: 0.5114, F1 Score: 0.3460,
Harmful Score Accuracy: 0.9789, F1 Score: 0.9685,
Target Accuracy: 0.7062, F1 Score: 0.6555


# Image_text_sentiment

In [19]:
# Initialize the model and move it to the GPU
model= Image_text_sentiment()
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)  # Use DataParallel if multiple GPUs are available
model.to(device) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# Loss functions
loss_fn_sentiment = nn.CrossEntropyLoss().to(device)
loss_fn_emotion = nn.CrossEntropyLoss().to(device)
loss_fn_sarcasm = nn.BCEWithLogitsLoss().to(device)
loss_fn_bully = nn.CrossEntropyLoss().to(device)
loss_fn_harmful_score = nn.CrossEntropyLoss().to(device)
loss_fn_target = nn.CrossEntropyLoss().to(device)

# Training loop
for epoch in range(15):  # Set epochs accordingly
    model.train()
    
    total_loss = 0  # Initialize total loss for the epoch
    
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in train_dataloader:
        # Move data to the GPU
    # for images, text_input_ids, text_attention_mask, bully_labels in train_dataloader:
  
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)
        
        optimizer.zero_grad()  # Clear gradients at the start of each batch
        
        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)
        
        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())  # Squeeze if necessary
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)
        
        # Total loss (sum or weigh the losses as needed)
        total_loss_batch = loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target
        
        # Backward pass and optimization
        total_loss_batch.backward()
        optimizer.step()  # Update model parameters
        
        total_loss += total_loss_batch.item()  # Accumulate loss for the epoch

    # Optionally clear cache at the end of each epoch
    torch.cuda.empty_cache()  
    
    # Print the average loss for the epoch
    avg_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch}, Average Loss: {avg_loss:.4f}')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch 0, Average Loss: 4.7218
Epoch 1, Average Loss: 4.3478
Epoch 2, Average Loss: 4.0018
Epoch 3, Average Loss: 3.4995
Epoch 4, Average Loss: 3.0654
Epoch 5, Average Loss: 2.6763
Epoch 6, Average Loss: 2.4189
Epoch 7, Average Loss: 2.2007
Epoch 8, Average Loss: 2.0683
Epoch 9, Average Loss: 1.8804
Epoch 10, Average Loss: 1.8753
Epoch 11, Average Loss: 1.7336
Epoch 12, Average Loss: 1.6399
Epoch 13, Average Loss: 1.5704
Epoch 14, Average Loss: 1.5171


In [20]:
# Import required libraries (if not already done)
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score

# Validation phase
model.eval()  # Set model to evaluation mode
total_val_loss = 0

# Initialize lists to store true and predicted labels for each task
all_labels_bully = []
all_preds_bully = []

all_labels_sentiment = []
all_preds_sentiment = []

all_labels_emotion = []
all_preds_emotion = []

all_labels_sarcasm = []
all_preds_sarcasm = []

all_labels_harmful_score = []
all_preds_harmful_score = []

all_labels_target = []
all_preds_target = []

with torch.no_grad():  # Disable gradient calculation
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in val_dataloader:
        # Move data to the GPU
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)

        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)

        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)

        # Total loss
        total_val_loss += (loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target).item()

        # Get predictions for each task
        _, predicted_sentiment = torch.max(sentiment_out, 1)
        _, predicted_emotion = torch.max(emotion_out, 1)
        _, predicted_sarcasm = torch.max(sarcasm_out, 1)
        _, predicted_bully = torch.max(bully_out, 1)
        _, predicted_harmful_score = torch.max(harmful_score_out, 1)  # Assuming multi-class
        _, predicted_target = torch.max(target_out, 1)  # Assuming multi-class

        # Collect true and predicted labels for each task
        all_labels_sentiment.append(sentiment_labels.cpu().numpy())
        all_preds_sentiment.append(predicted_sentiment.cpu().numpy())

        all_labels_emotion.append(emotion_labels.cpu().numpy())
        all_preds_emotion.append(predicted_emotion.cpu().numpy())

        all_labels_sarcasm.append(sarcasm_labels.cpu().numpy())
        all_preds_sarcasm.append(predicted_sarcasm.cpu().numpy())

        all_labels_bully.append(bully_labels.cpu().numpy())
        all_preds_bully.append(predicted_bully.cpu().numpy())

        all_labels_harmful_score.append(harmful_score_labels.cpu().numpy())
        all_preds_harmful_score.append(predicted_harmful_score.cpu().numpy())

        all_labels_target.append(target_labels.cpu().numpy())
        all_preds_target.append(predicted_target.cpu().numpy())

avg_val_loss = total_val_loss / len(val_dataloader)

# Flatten lists for each task
all_labels_bully = np.concatenate(all_labels_bully)
all_preds_bully = np.concatenate(all_preds_bully)

all_labels_sentiment = np.concatenate(all_labels_sentiment)
all_preds_sentiment = np.concatenate(all_preds_sentiment)

all_labels_emotion = np.concatenate(all_labels_emotion)
all_preds_emotion = np.concatenate(all_preds_emotion)

all_labels_sarcasm = np.concatenate(all_labels_sarcasm)
all_preds_sarcasm = np.concatenate(all_preds_sarcasm)

all_labels_harmful_score = np.concatenate(all_labels_harmful_score)
all_preds_harmful_score = np.concatenate(all_preds_harmful_score)

all_labels_target = np.concatenate(all_labels_target)
all_preds_target = np.concatenate(all_preds_target)

# Calculate accuracy and F1 score for each task
accuracy_bully_SA = accuracy_score(all_labels_bully, all_preds_bully)
f1_bully_SA = f1_score(all_labels_bully, all_preds_bully, average='weighted')

accuracy_sentiment_SA = accuracy_score(all_labels_sentiment, all_preds_sentiment)
f1_sentiment_SA = f1_score(all_labels_sentiment, all_preds_sentiment, average='weighted')

accuracy_emotion_SA = accuracy_score(all_labels_emotion, all_preds_emotion)
f1_emotion_SA = f1_score(all_labels_emotion, all_preds_emotion, average='weighted')

accuracy_sarcasm_SA = accuracy_score(all_labels_sarcasm, all_preds_sarcasm)
f1_sarcasm_SA = f1_score(all_labels_sarcasm, all_preds_sarcasm, average='weighted')

accuracy_harmful_score_SA = accuracy_score(all_labels_harmful_score, all_preds_harmful_score)
f1_harmful_score_SA = f1_score(all_labels_harmful_score, all_preds_harmful_score, average='weighted')

accuracy_target_SA = accuracy_score(all_labels_target, all_preds_target)
f1_target_SA = f1_score(all_labels_target, all_preds_target, average='weighted')

print(f'Epoch {epoch}, Validation Loss: {avg_val_loss:.4f},\n'
      f'Bully Accuracy: {accuracy_bully:.4f}, F1 Score: {f1_bully:.4f},\n'
      f'Sentiment Accuracy: {accuracy_sentiment:.4f}, F1 Score: {f1_sentiment:.4f},\n'
      f'Emotion Accuracy: {accuracy_emotion:.4f}, F1 Score: {f1_emotion:.4f},\n'
      f'Sarcasm Accuracy: {accuracy_sarcasm:.4f}, F1 Score: {f1_sarcasm:.4f},\n'
      f'Harmful Score Accuracy: {accuracy_harmful_score:.4f}, F1 Score: {f1_harmful_score:.4f},\n'
      f'Target Accuracy: {accuracy_target:.4f}, F1 Score: {f1_target:.4f}')




Epoch 14, Validation Loss: 8.2057,
Bully Accuracy: 0.7744, F1 Score: 0.7669,
Sentiment Accuracy: 0.6299, F1 Score: 0.6479,
Emotion Accuracy: 0.2175, F1 Score: 0.2278,
Sarcasm Accuracy: 0.5114, F1 Score: 0.3460,
Harmful Score Accuracy: 0.9789, F1 Score: 0.9685,
Target Accuracy: 0.7062, F1 Score: 0.6555


# Image_text_emotion sentiment

In [21]:
# Initialize the model and move it to the GPU
model= Image_text_emotion_sentiment()
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)  # Use DataParallel if multiple GPUs are available
model.to(device) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# Loss functions
loss_fn_sentiment = nn.CrossEntropyLoss().to(device)
loss_fn_emotion = nn.CrossEntropyLoss().to(device)
loss_fn_sarcasm = nn.BCEWithLogitsLoss().to(device)
loss_fn_bully = nn.CrossEntropyLoss().to(device)
loss_fn_harmful_score = nn.CrossEntropyLoss().to(device)
loss_fn_target = nn.CrossEntropyLoss().to(device)

# Training loop
for epoch in range(15):  # Set epochs accordingly
    model.train()
    
    total_loss = 0  # Initialize total loss for the epoch
    
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in train_dataloader:
        # Move data to the GPU
    # for images, text_input_ids, text_attention_mask, bully_labels in train_dataloader:
  
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)
        
        optimizer.zero_grad()  # Clear gradients at the start of each batch
        
        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)
        
        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())  # Squeeze if necessary
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)
        
        # Total loss (sum or weigh the losses as needed)
        total_loss_batch = loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target
        
        # Backward pass and optimization
        total_loss_batch.backward()
        optimizer.step()  # Update model parameters
        
        total_loss += total_loss_batch.item()  # Accumulate loss for the epoch

    # Optionally clear cache at the end of each epoch
    torch.cuda.empty_cache()  
    
    # Print the average loss for the epoch
    avg_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch}, Average Loss: {avg_loss:.4f}')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch 0, Average Loss: 4.7302
Epoch 1, Average Loss: 4.3580
Epoch 2, Average Loss: 4.0041
Epoch 3, Average Loss: 3.5066
Epoch 4, Average Loss: 3.0784
Epoch 5, Average Loss: 2.6493
Epoch 6, Average Loss: 2.4539
Epoch 7, Average Loss: 2.2440
Epoch 8, Average Loss: 2.0973
Epoch 9, Average Loss: 1.9535
Epoch 10, Average Loss: 1.8408
Epoch 11, Average Loss: 1.8069
Epoch 12, Average Loss: 1.6760
Epoch 13, Average Loss: 1.6009
Epoch 14, Average Loss: 1.5619


In [22]:
# Import required libraries (if not already done)
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score

# Validation phase
model.eval()  # Set model to evaluation mode
total_val_loss = 0

# Initialize lists to store true and predicted labels for each task
all_labels_bully = []
all_preds_bully = []

all_labels_sentiment = []
all_preds_sentiment = []

all_labels_emotion = []
all_preds_emotion = []

all_labels_sarcasm = []
all_preds_sarcasm = []

all_labels_harmful_score = []
all_preds_harmful_score = []

all_labels_target = []
all_preds_target = []

with torch.no_grad():  # Disable gradient calculation
    for images, text_input_ids, text_attention_mask, sentiment_labels, emotion_labels, sarcasm_labels, bully_labels, harmful_score_labels, target_labels in val_dataloader:
        # Move data to the GPU
        images = images.to(device)
        text_input_ids = text_input_ids.to(device)
        text_attention_mask = text_attention_mask.to(device)
        sentiment_labels = sentiment_labels.to(device)
        emotion_labels = emotion_labels.to(device)
        sarcasm_labels = sarcasm_labels.to(device)
        bully_labels = bully_labels.to(device)
        harmful_score_labels = harmful_score_labels.to(device)
        target_labels = target_labels.to(device)

        # Forward pass
        sentiment_out, emotion_out, sarcasm_out, bully_out, harmful_score_out, target_out = model(images, text_input_ids, text_attention_mask)

        # Compute loss for each task
        loss_sentiment = loss_fn_sentiment(sentiment_out, sentiment_labels)
        loss_emotion = loss_fn_emotion(emotion_out, emotion_labels)
        loss_sarcasm = loss_fn_sarcasm(sarcasm_out.squeeze(), sarcasm_labels.float())
        loss_bully = loss_fn_bully(bully_out, bully_labels)
        loss_harmful_score = loss_fn_harmful_score(harmful_score_out, harmful_score_labels)
        loss_target = loss_fn_target(target_out, target_labels)

        # Total loss
        total_val_loss += (loss_sentiment + loss_emotion + loss_sarcasm + loss_bully + loss_harmful_score + loss_target).item()

        # Get predictions for each task
        _, predicted_sentiment = torch.max(sentiment_out, 1)
        _, predicted_emotion = torch.max(emotion_out, 1)
        _, predicted_sarcasm = torch.max(sarcasm_out, 1)
        _, predicted_bully = torch.max(bully_out, 1)
        _, predicted_harmful_score = torch.max(harmful_score_out, 1)  # Assuming multi-class
        _, predicted_target = torch.max(target_out, 1)  # Assuming multi-class

        # Collect true and predicted labels for each task
        all_labels_sentiment.append(sentiment_labels.cpu().numpy())
        all_preds_sentiment.append(predicted_sentiment.cpu().numpy())

        all_labels_emotion.append(emotion_labels.cpu().numpy())
        all_preds_emotion.append(predicted_emotion.cpu().numpy())

        all_labels_sarcasm.append(sarcasm_labels.cpu().numpy())
        all_preds_sarcasm.append(predicted_sarcasm.cpu().numpy())

        all_labels_bully.append(bully_labels.cpu().numpy())
        all_preds_bully.append(predicted_bully.cpu().numpy())

        all_labels_harmful_score.append(harmful_score_labels.cpu().numpy())
        all_preds_harmful_score.append(predicted_harmful_score.cpu().numpy())

        all_labels_target.append(target_labels.cpu().numpy())
        all_preds_target.append(predicted_target.cpu().numpy())

avg_val_loss = total_val_loss / len(val_dataloader)

# Flatten lists for each task
all_labels_bully = np.concatenate(all_labels_bully)
all_preds_bully = np.concatenate(all_preds_bully)

all_labels_sentiment = np.concatenate(all_labels_sentiment)
all_preds_sentiment = np.concatenate(all_preds_sentiment)

all_labels_emotion = np.concatenate(all_labels_emotion)
all_preds_emotion = np.concatenate(all_preds_emotion)

all_labels_sarcasm = np.concatenate(all_labels_sarcasm)
all_preds_sarcasm = np.concatenate(all_preds_sarcasm)

all_labels_harmful_score = np.concatenate(all_labels_harmful_score)
all_preds_harmful_score = np.concatenate(all_preds_harmful_score)

all_labels_target = np.concatenate(all_labels_target)
all_preds_target = np.concatenate(all_preds_target)

# Calculate accuracy and F1 score for each task
accuracy_bully_SA_EM = accuracy_score(all_labels_bully, all_preds_bully)
f1_bully_SA_EM = f1_score(all_labels_bully, all_preds_bully, average='weighted')

accuracy_sentiment_SA_EM = accuracy_score(all_labels_sentiment, all_preds_sentiment)
f1_sentiment_SA_EM = f1_score(all_labels_sentiment, all_preds_sentiment, average='weighted')

accuracy_emotion_SA_EM = accuracy_score(all_labels_emotion, all_preds_emotion)
f1_emotion_SA_EM = f1_score(all_labels_emotion, all_preds_emotion, average='weighted')

accuracy_sarcasm_SA_EM = accuracy_score(all_labels_sarcasm, all_preds_sarcasm)
f1_sarcasm_SA_EM = f1_score(all_labels_sarcasm, all_preds_sarcasm, average='weighted')

accuracy_harmful_score_SA_EM = accuracy_score(all_labels_harmful_score, all_preds_harmful_score)
f1_harmful_score_SA_EM = f1_score(all_labels_harmful_score, all_preds_harmful_score, average='weighted')

accuracy_target_SA_EM = accuracy_score(all_labels_target, all_preds_target)
f1_target_SA_EM = f1_score(all_labels_target, all_preds_target, average='weighted')

print(f'Epoch {epoch}, Validation Loss: {avg_val_loss:.4f},\n'
      f'Bully Accuracy: {accuracy_bully:.4f}, F1 Score: {f1_bully:.4f},\n'
      f'Sentiment Accuracy: {accuracy_sentiment:.4f}, F1 Score: {f1_sentiment:.4f},\n'
      f'Emotion Accuracy: {accuracy_emotion:.4f}, F1 Score: {f1_emotion:.4f},\n'
      f'Sarcasm Accuracy: {accuracy_sarcasm:.4f}, F1 Score: {f1_sarcasm:.4f},\n'
      f'Harmful Score Accuracy: {accuracy_harmful_score:.4f}, F1 Score: {f1_harmful_score:.4f},\n'
      f'Target Accuracy: {accuracy_target:.4f}, F1 Score: {f1_target:.4f}')




Epoch 14, Validation Loss: 8.8220,
Bully Accuracy: 0.7744, F1 Score: 0.7669,
Sentiment Accuracy: 0.6299, F1 Score: 0.6479,
Emotion Accuracy: 0.2175, F1 Score: 0.2278,
Sarcasm Accuracy: 0.5114, F1 Score: 0.3460,
Harmful Score Accuracy: 0.9789, F1 Score: 0.9685,
Target Accuracy: 0.7062, F1 Score: 0.6555


In [34]:
accuracy_target_EM

NameError: name 'accuracy_target_EM' is not defined

In [37]:
accuracy_target_SA

0.7159090909090909

In [26]:
import numpy as np

In [None]:
fi