In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from transformers import BertTokenizer, BertModel
from PIL import Image
import json
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from langdetect import detect_langs
from googletrans import Translator


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
# Define image transformations with data augmentation
image_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [4]:
# Define a function for language detection and translation
def detect_and_translate(text):
    try:
        detected_langs = detect_langs(text)
        detected_lang = detected_langs[0].lang
        detected_prob = detected_langs[0].prob

        if detected_lang != 'en' and detected_prob > 0.5:
            translator = Translator()
            translated_text = translator.translate(text, src=detected_lang, dest='en').text
            return translated_text
        else:
            return text
    except Exception as e:
        print(f"Error in translation: {e}")
        return text


In [None]:
class MemeDataset(Dataset):
    def __init__(self, image_dir, annotation_file, tokenizer, label_mapping, transform=None):
        self.image_dir = image_dir
        with open(annotation_file, 'r', encoding='utf-8') as f:
            self.annotations = json.load(f)
        self.tokenizer = tokenizer
        self.label_mapping = label_mapping
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations[idx]
        image_path = os.path.join(self.image_dir, annotation['image'])
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        text = annotation['text']  # Use your actual text processing function here
        inputs = self.tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()

        labels = annotation.get('labels', [])
        label_ids = [self.label_mapping[label] for label in labels]

        # Convert label_ids to a fixed-size tensor, e.g., a one-hot encoded tensor
        label_tensor = torch.zeros(len(self.label_mapping))
        for label_id in label_ids:
            label_tensor[label_id] = 1.0

        return image, input_ids, attention_mask, label_tensor

# Define the dataset and dataloader
train_image_dir = 'C:\\Users\\harih\\Downloads\\train_images\\train_images'
train_annotation_file = 'C:\\Users\\harih\\Downloads\\annotations_v2\\semeval2024_dev_release\\subtask2a\\train.json'
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
label_mapping = {'label1': 0, 'label2': 1}  # Example label mapping
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_dataset = MemeDataset(train_image_dir, train_annotation_file, tokenizer, label_mapping, transform=image_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()  # Use binary cross-entropy for multi-label classification

num_epochs = 5
model.train()
for epoch in range(num_epochs):
    for images, input_ids, attention_mask, labels in train_loader:
        images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask, images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


In [6]:
import torch
import torch.nn as nn
from transformers import BertModel
import torchvision.models as models

# Define the model class
class MemeClassifier(nn.Module):
    def __init__(self, num_labels):
        super(MemeClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.resnet = models.resnet50(pretrained=True)
        
        # Fine-tune ResNet layers
        for param in self.resnet.parameters():
            param.requires_grad = True
            
        self.resnet.fc = nn.Identity()
        
        # Additional layers on top of ResNet50
        self.fc1 = nn.Linear(self.bert.config.hidden_size + 2048, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.5)
        self.classifier = nn.Linear(512, num_labels)

    def forward(self, input_ids, attention_mask, images):
        text_features = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
        image_features = self.resnet(images)
        features = torch.cat((text_features, image_features), dim=1)
        
        x = self.fc1(features)
        x = self.bn1(x)
        x = torch.relu(x)
        x = self.dropout1(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = torch.relu(x)
        x = self.dropout2(x)
        
        logits = self.classifier(x)
        return logits

In [7]:
# Load label mappings
label_mapping = {'Appeal to (Strong) Emotions': 0, 'Appeal to authority': 1, 'Appeal to fear/prejudice': 2, 'Bandwagon': 3, 'Black-and-white Fallacy/Dictatorship': 4, 'Causal Oversimplification': 5, 'Doubt': 6, 'Exaggeration/Minimisation': 7, 'Flag-waving': 8, 'Glittering generalities (Virtue)': 9, 'Loaded Language': 10, "Misrepresentation of Someone's Position (Straw Man)": 11, 'Name calling/Labeling': 12, 'Obfuscation, Intentional vagueness, Confusion': 13, 'Presenting Irrelevant Data (Red Herring)': 14, 'Reductio ad hitlerum': 15, 'Repetition': 16, 'Slogans': 17, 'Smears': 18, 'Thought-terminating cliché': 19, 'Transfer': 20, 'Whataboutism': 21}  # Example label mapping
reversed_label_mapping = {v: k for k, v in label_mapping.items()}
print(label_mapping)

{'Appeal to (Strong) Emotions': 0, 'Appeal to authority': 1, 'Appeal to fear/prejudice': 2, 'Bandwagon': 3, 'Black-and-white Fallacy/Dictatorship': 4, 'Causal Oversimplification': 5, 'Doubt': 6, 'Exaggeration/Minimisation': 7, 'Flag-waving': 8, 'Glittering generalities (Virtue)': 9, 'Loaded Language': 10, "Misrepresentation of Someone's Position (Straw Man)": 11, 'Name calling/Labeling': 12, 'Obfuscation, Intentional vagueness, Confusion': 13, 'Presenting Irrelevant Data (Red Herring)': 14, 'Reductio ad hitlerum': 15, 'Repetition': 16, 'Slogans': 17, 'Smears': 18, 'Thought-terminating cliché': 19, 'Transfer': 20, 'Whataboutism': 21}


In [8]:
# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = MemeClassifier(len(label_mapping))
model.to(device)




MemeClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [9]:
# Define training parameters
learning_rate = 1e-5
batch_size = 16
num_epochs = 10


In [10]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)


In [11]:
# Define the dataset and dataloader
train_image_dir = 'C:\\Users\\harih\\Downloads\\train_images\\train_images'
train_annotation_file = 'C:\\Users\\harih\\Downloads\\annotations_v2\\semeval2024_dev_release\\subtask2a\\train.json'
train_dataset = MemeDataset(train_image_dir, train_annotation_file, tokenizer, label_mapping, transform=image_transforms)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [12]:
# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()  # Use binary cross-entropy for multi-label classification

num_epochs = 100
model.train()
for epoch in range(num_epochs):
    for images, input_ids, attention_mask, labels in train_loader:
        images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask, images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Epoch [1/100], Loss: 0.4783
Epoch [2/100], Loss: 0.4140
Epoch [3/100], Loss: 0.2856
Epoch [4/100], Loss: 0.3219
Epoch [5/100], Loss: 0.2369
Epoch [6/100], Loss: 0.2339
Epoch [7/100], Loss: 0.2412
Epoch [8/100], Loss: 0.1884
Epoch [9/100], Loss: 0.2388
Epoch [10/100], Loss: 0.1587
Epoch [11/100], Loss: 0.2061
Epoch [12/100], Loss: 0.2166
Epoch [13/100], Loss: 0.2623
Epoch [14/100], Loss: 0.1236
Epoch [15/100], Loss: 0.1463
Epoch [16/100], Loss: 0.1120
Epoch [17/100], Loss: 0.0712
Epoch [18/100], Loss: 0.1159
Epoch [19/100], Loss: 0.1341
Epoch [20/100], Loss: 0.0911
Epoch [21/100], Loss: 0.1777
Epoch [22/100], Loss: 0.0846
Epoch [23/100], Loss: 0.1760
Epoch [24/100], Loss: 0.0655
Epoch [25/100], Loss: 0.1154
Epoch [26/100], Loss: 0.0829
Epoch [27/100], Loss: 0.1262
Epoch [28/100], Loss: 0.0830
Epoch [29/100], Loss: 0.0785
Epoch [30/100], Loss: 0.0395
Epoch [31/100], Loss: 0.0572
Epoch [32/100], Loss: 0.0608
Epoch [33/100], Loss: 0.0402
Epoch [34/100], Loss: 0.0571
Epoch [35/100], Loss: 0

In [25]:
# Save the trained model
torch.save(model.state_dict(), 'bert_resnet_model100.pth')



In [12]:
# Define evaluation function
def evaluate(model, dataloader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, input_ids, attention_mask, labels in dataloader:
            images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(input_ids, attention_mask, images)
            preds = torch.sigmoid(outputs).round().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    print(classification_report(all_labels, all_preds, target_names=reversed_label_mapping.values()))


In [15]:
# Define validation dataset and dataloader
val_image_dir = 'C:\\Users\\harih\\Downloads\\dev_images\\dev_images'
val_annotation_file = 'C:\\Users\\harih\\Downloads\\annotations_v2\\semeval2024_dev_release\\subtask2a\\dev_unlabeled.json'
val_dataset = MemeDataset(val_image_dir, val_annotation_file, tokenizer, label_mapping, transform=image_transforms)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [16]:
import json
from sklearn.metrics import classification_report

# Load the validation file to get the list of IDs
with open('C:\\Users\\harih\\Downloads\\annotations_v2\\semeval2024_dev_release\\subtask2a\\dev_unlabeled.json', 'r', encoding='utf-8') as f:
    validation_data = json.load(f)
    id_mapping = {idx: item['id'] for idx, item in enumerate(validation_data)}

# Define the reversed label mapping
reversed_label_mapping = {v: k for k, v in label_mapping.items()}

# Define evaluation function
def evaluate(model, dataloader, output_file):
    model.eval()
    results = []
    
    with torch.no_grad():
        for batch_idx, (images, input_ids, attention_mask, labels) in enumerate(dataloader):
            images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(input_ids, attention_mask, images)
            preds = torch.sigmoid(outputs).round().cpu().numpy()
            
            for item_idx, (pred, label) in enumerate(zip(preds, labels.cpu().numpy())):
                result_id = id_mapping[batch_idx * dataloader.batch_size + item_idx]
                result = {
                    "id": result_id,
                    "labels": [reversed_label_mapping[i] for i, p in enumerate(pred) if p == 1.0]
                }
                results.append(result)
    
    # Save results to a JSON file
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=4)

# Define the dataset and dataloader for validation
val_image_dir = 'C:\\Users\\harih\\Downloads\\dev_images\\dev_images'
val_annotation_file = 'C:\\Users\\harih\\Downloads\\annotations_v2\\semeval2024_dev_release\\subtask2a\\dev_unlabeled.json'
val_dataset = MemeDataset(val_image_dir, val_annotation_file, tokenizer, label_mapping, transform=image_transforms)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Evaluate the model on validation data and save predictions
output_file = 'resnet50_predictions.json'
evaluate(model, val_loader, output_file)


In [17]:
!python subtask_1_2a.py -g dev_gold_labels/dev_gold_labels/dev_subtask2a_en.json -p resnet50_predictions.json

f1_h=0.64742	prec_h=0.70391	rec_h=0.59932


In [18]:
# Load the validation file to get the list of IDs
with open('C:\\Users\\harih\\Downloads\\test_data\\test_data\\north_macedonian\\mk_subtask2a_test_unlabeled.json', 'r', encoding='utf-8') as f:
    mk_validation_data = json.load(f)
    id_mapping = {idx: item['id'] for idx, item in enumerate(mk_validation_data)}
mk_val_image_dir = 'C:\\Users\\harih\\Downloads\\test_images\\test_images\\subtask1_2a\\north_macedonian'
mk_val_annotation_file = 'C:\\Users\\harih\\Downloads\\test_data\\test_data\\north_macedonian\\mk_subtask2a_test_unlabeled.json'
mk_val_dataset = MemeDataset(mk_val_image_dir, mk_val_annotation_file, tokenizer, label_mapping, transform=image_transforms)
mk_val_loader = DataLoader(mk_val_dataset, batch_size=batch_size, shuffle=False)

# Evaluate the model on validation data and save predictions
output_file = 'mk_resnet50_predictions.json'
evaluate(model, mk_val_loader, output_file)

In [19]:
!python subtask_1_2a.py -g gold_labels_ar_bg_md_version2/test_subtask2a_md.json -p mk_resnet50_predictions.json

f1_h=0.60028	prec_h=0.76690	rec_h=0.49314


In [20]:
# Load the validation file to get the list of IDs
with open('C:\\Users\\harih\\Downloads\\test_data\\test_data\\bulgarian\\bg_subtask2a_test_unlabeled.json', 'r', encoding='utf-8') as f:
    bg_validation_data = json.load(f)
    id_mapping = {idx: item['id'] for idx, item in enumerate(bg_validation_data)}
bg_val_image_dir = 'C:\\Users\\harih\\Downloads\\test_images\\test_images\\subtask1_2a\\bulgarian'
bg_val_annotation_file = 'C:\\Users\\harih\\Downloads\\test_data\\test_data\\bulgarian\\bg_subtask2a_test_unlabeled.json'
bg_val_dataset = MemeDataset(bg_val_image_dir, bg_val_annotation_file, tokenizer, label_mapping, transform=image_transforms)
bg_val_loader = DataLoader(bg_val_dataset, batch_size=batch_size, shuffle=False)

# Evaluate the model on validation data and save predictions
output_file = 'bg_resnet50_predictions.json'
evaluate(model, bg_val_loader, output_file)

In [21]:
!python subtask_1_2a.py -g gold_labels_ar_bg_md_version2/test_subtask2a_bg.json -p bg_resnet50_predictions.json

f1_h=0.56332	prec_h=0.71315	rec_h=0.46552


In [22]:
# Load the validation file to get the list of IDs
with open('C:\\Users\\harih\\Downloads\\test_data_arabic\\test_data_arabic\\ar_subtask2a_test_unlabeled.json', 'r', encoding='utf-8') as f:
    ar_validation_data = json.load(f)
    id_mapping = {idx: item['id'] for idx, item in enumerate(ar_validation_data)}
ar_val_image_dir = 'C:\\Users\\harih\\Downloads\\test_images_arabic\\test_images_arabic\\subtask2a'
ar_val_annotation_file = 'C:\\Users\\harih\\Downloads\\test_data_arabic\\test_data_arabic\\ar_subtask2a_test_unlabeled.json'
ar_val_dataset = MemeDataset(ar_val_image_dir, ar_val_annotation_file, tokenizer, label_mapping, transform=image_transforms)
ar_val_loader = DataLoader(ar_val_dataset, batch_size=batch_size, shuffle=False)

# Evaluate the model on validation data and save predictions
output_file = 'ar_resnet50_predictions.json'
evaluate(model, ar_val_loader, output_file)

In [23]:
!python subtask_1_2a.py -g gold_labels_ar_bg_md_version2/test_subtask2a_ar.json -p ar_resnet50_predictions.json

f1_h=0.39416	prec_h=0.52941	rec_h=0.31395


In [15]:
# Load the validation file to get the list of IDs
def evaluate(model, dataloader, output_file):
    model.eval()
    results = []
    
    with torch.no_grad():
        for batch_idx, (images, input_ids, attention_mask, labels) in enumerate(dataloader):
            images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(input_ids, attention_mask, images)
            preds = torch.sigmoid(outputs).round().cpu().numpy()
            
            for item_idx, (pred, label) in enumerate(zip(preds, labels.cpu().numpy())):
                result_id = id_mapping[batch_idx * dataloader.batch_size + item_idx]
                result = {
                    "id": result_id,
                    "labels": [reversed_label_mapping[i] for i, p in enumerate(pred) if p == 1.0]
                }
                results.append(result)
    
    # Save results to a JSON file
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=4)

with open('C:\\Users\\harih\\Downloads\\test_data\\test_data\\english\\en_subtask2a_test_unlabeled.json', 'r', encoding='utf-8') as f:
    en_validation_data = json.load(f)
    id_mapping = {idx: item['id'] for idx, item in enumerate(en_validation_data)}
en_val_image_dir = 'C:\\Users\\harih\\Downloads\\test_images\\test_images\\subtask1_2a\\english'
en_val_annotation_file = 'C:\\Users\\harih\\Downloads\\test_data\\test_data\\english\\en_subtask2a_test_unlabeled.json'
en_val_dataset = MemeDataset(en_val_image_dir, en_val_annotation_file, tokenizer, label_mapping, transform=image_transforms)
en_val_loader = DataLoader(en_val_dataset, batch_size=batch_size, shuffle=False)

# Evaluate the model on validation data and save predictions
output_file = 'en_resnet50_predictions1.json'
evaluate(model, en_val_loader, output_file)

  attn_output = torch.nn.functional.scaled_dot_product_attention(
