In [1]:
import torch
torch.manual_seed(0)
from PIL import Image
import json
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

In [2]:
import re

def clean_text(text):
    text = re.sub(r'(\\n)+', '. ', text)
    text = text.replace("\\", " ")

    text = re.sub(r'\s+', ' ', text).strip()

    return text

In [3]:
# Mapping of labels to indices
label_dict = {
    'Appeal to (Strong) Emotions': 0,
    'Appeal to authority': 1,
    'Appeal to fear/prejudice': 2,
    'Bandwagon': 3,
    'Black-and-white Fallacy/Dictatorship': 4,
    'Causal Oversimplification': 5,
    'Doubt': 6,
    'Exaggeration/Minimisation': 7,
    'Flag-waving': 8,
    'Glittering generalities (Virtue)': 9,
    'Loaded Language': 10,
    "Misrepresentation of Someone's Position (Straw Man)": 11,
    'Name calling/Labeling': 12,
    'Obfuscation, Intentional vagueness, Confusion': 13,
    'Presenting Irrelevant Data (Red Herring)': 14,
    'Reductio ad hitlerum': 15,
    'Repetition': 16,
    'Slogans': 17,
    'Smears': 18,
    'Thought-terminating cliche': 19,
    'Transfer': 20,
    'Whataboutism': 21
}

In [4]:
persuasion_techniques = [key for key, value in label_dict.items()]

In [5]:
class MemesDataset(Dataset):
    def __init__(self, json_file, img_dir, label_dict, transform=None):
        with open(json_file, 'r', encoding='utf-8') as f:
            self.data = json.load(f)
        self.img_dir = img_dir
        self.label_dict = label_dict
        self.num_labels = len(label_dict)
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        img_path = f"{self.img_dir}/{item['image']}"
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        text = clean_text(item['text'])
        # Create a zero vector of length num_labels
        label_vector = torch.zeros(self.num_labels, dtype=torch.float32)
        # Set positions of labels to 1
        # labels = [label.replace('é', 'e') for label in item['labels']]
        for label in item['labels']:
            if label in self.label_dict:
                label_idx = self.label_dict[label]
                label_vector[label_idx] = 1
        
        # print(label_vector)
        return image, text, label_vector


In [6]:
!pip install transformers


Defaulting to user installation because normal site-packages is not writeable


In [7]:
from transformers import CLIPModel
import torch.nn as nn

class CLIPClassifier(nn.Module):
    def __init__(self, num_labels):
        super(CLIPClassifier, self).__init__()
        self.clip = CLIPModel.from_pretrained('openai/clip-vit-base-patch32')
        # Get the output dimension of the text and image embeddings from the model's configuration
        embedding_dim = self.clip.config.projection_dim
        self.classifier = nn.Linear(embedding_dim * 2, num_labels)  # Assuming concatenation of embeddings

    def forward(self, input_ids, pixel_values, attention_mask):
        outputs = self.clip(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask)
        # Concatenate text and image embeddings along the last dimension
        combined_features = torch.cat((outputs.text_embeds, outputs.image_embeds), dim=-1)
        logits = self.classifier(combined_features)
        return logits

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from transformers import CLIPProcessor

# Initialize the dataset and dataloader
img_dir = 'train_images/train_images'
json_file = 'annotations_v2/semeval2024_dev_release/subtask2a/train.json'
dataset = MemesDataset(json_file, img_dir, label_dict)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize the processor and model
processor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')
# model = CLIPModel.from_pretrained('openai/clip-vit-base-patch32')
model = CLIPClassifier(num_labels=22)
model.train()

CLIPClassifier(
  (clip): CLIPModel(
    (text_model): CLIPTextTransformer(
      (embeddings): CLIPTextEmbeddings(
        (token_embedding): Embedding(49408, 512)
        (position_embedding): Embedding(77, 512)
      )
      (encoder): CLIPEncoder(
        (layers): ModuleList(
          (0-11): 12 x CLIPEncoderLayer(
            (self_attn): CLIPAttention(
              (k_proj): Linear(in_features=512, out_features=512, bias=True)
              (v_proj): Linear(in_features=512, out_features=512, bias=True)
              (q_proj): Linear(in_features=512, out_features=512, bias=True)
              (out_proj): Linear(in_features=512, out_features=512, bias=True)
            )
            (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
            (mlp): CLIPMLP(
              (activation_fn): QuickGELUActivation()
              (fc1): Linear(in_features=512, out_features=2048, bias=True)
              (fc2): Linear(in_features=2048, out_features=512, bias=True)
 

In [9]:
import torch.optim as optim
from tqdm import tqdm


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-4)
epochs = 100
loss_fn = torch.nn.BCEWithLogitsLoss()

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, texts, labels in tqdm(dataloader):
        # print(labels)
        inputs = processor(text=texts, images=images, return_tensors="pt", padding=True, truncation=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Ensure the model forward method aligns with inputs correctly
        logits = model(input_ids=inputs['input_ids'], pixel_values=inputs['pixel_values'], attention_mask=inputs['attention_mask'])

        loss = loss_fn(logits, labels.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Average Loss: {total_loss / len(dataloader)}")


  0%|          | 0/110 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.
100%|██████████| 110/110 [19:00<00:00, 10.37s/it]


Epoch 1, Average Loss: 0.5552657029845498


100%|██████████| 110/110 [18:14<00:00,  9.95s/it]


Epoch 2, Average Loss: 0.43526397130706096


100%|██████████| 110/110 [18:18<00:00,  9.98s/it]


Epoch 3, Average Loss: 0.36576067209243773


100%|██████████| 110/110 [18:00<00:00,  9.82s/it]


Epoch 4, Average Loss: 0.3257249507037076


100%|██████████| 110/110 [18:08<00:00,  9.90s/it]


Epoch 5, Average Loss: 0.30012378882278096


100%|██████████| 110/110 [18:22<00:00, 10.02s/it]


Epoch 6, Average Loss: 0.28164829327301544


100%|██████████| 110/110 [17:38<00:00,  9.62s/it]


Epoch 7, Average Loss: 0.2662284577434713


100%|██████████| 110/110 [17:53<00:00,  9.76s/it]


Epoch 8, Average Loss: 0.25357123152776195


100%|██████████| 110/110 [18:30<00:00, 10.09s/it]


Epoch 9, Average Loss: 0.2426310573111881


100%|██████████| 110/110 [18:08<00:00,  9.90s/it]


Epoch 10, Average Loss: 0.23187359788201073


100%|██████████| 110/110 [18:08<00:00,  9.89s/it]


Epoch 11, Average Loss: 0.22227740477431904


100%|██████████| 110/110 [17:45<00:00,  9.69s/it]


Epoch 12, Average Loss: 0.21392529810016805


100%|██████████| 110/110 [18:06<00:00,  9.88s/it]


Epoch 13, Average Loss: 0.20570026595484125


100%|██████████| 110/110 [18:40<00:00, 10.18s/it]


Epoch 14, Average Loss: 0.19838103083047


100%|██████████| 110/110 [17:37<00:00,  9.62s/it]


Epoch 15, Average Loss: 0.19131986268542028


100%|██████████| 110/110 [17:39<00:00,  9.63s/it]


Epoch 16, Average Loss: 0.18493329814889214


100%|██████████| 110/110 [17:56<00:00,  9.79s/it]


Epoch 17, Average Loss: 0.17911764139478856


100%|██████████| 110/110 [17:37<00:00,  9.62s/it]


Epoch 18, Average Loss: 0.17297728630629453


100%|██████████| 110/110 [17:48<00:00,  9.71s/it]


Epoch 19, Average Loss: 0.16692242419177836


100%|██████████| 110/110 [18:21<00:00, 10.01s/it]


Epoch 20, Average Loss: 0.1615450950508768


100%|██████████| 110/110 [17:38<00:00,  9.62s/it]


Epoch 21, Average Loss: 0.15609032288193703


100%|██████████| 110/110 [17:48<00:00,  9.71s/it]


Epoch 22, Average Loss: 0.1510449773885987


100%|██████████| 110/110 [18:05<00:00,  9.87s/it]


Epoch 23, Average Loss: 0.1468775848773393


100%|██████████| 110/110 [18:00<00:00,  9.82s/it]


Epoch 24, Average Loss: 0.14290643578225917


100%|██████████| 110/110 [17:43<00:00,  9.67s/it]


Epoch 25, Average Loss: 0.1387164902958003


100%|██████████| 110/110 [17:54<00:00,  9.76s/it]


Epoch 26, Average Loss: 0.1345663586123423


100%|██████████| 110/110 [18:00<00:00,  9.82s/it]


Epoch 27, Average Loss: 0.12995725016702306


100%|██████████| 110/110 [18:21<00:00, 10.02s/it]


Epoch 28, Average Loss: 0.1257609067315405


100%|██████████| 110/110 [18:10<00:00,  9.92s/it]


Epoch 29, Average Loss: 0.1218655376271768


100%|██████████| 110/110 [17:57<00:00,  9.80s/it]


Epoch 30, Average Loss: 0.11812115237116813


100%|██████████| 110/110 [18:00<00:00,  9.83s/it]


Epoch 31, Average Loss: 0.11480468078093095


100%|██████████| 110/110 [18:35<00:00, 10.14s/it]


Epoch 32, Average Loss: 0.11059601469473405


100%|██████████| 110/110 [18:13<00:00,  9.94s/it]


Epoch 33, Average Loss: 0.10768330503593791


100%|██████████| 110/110 [17:52<00:00,  9.75s/it]


Epoch 34, Average Loss: 0.1057523713870482


100%|██████████| 110/110 [17:54<00:00,  9.77s/it]


Epoch 35, Average Loss: 0.10407690609043295


100%|██████████| 110/110 [17:49<00:00,  9.73s/it]


Epoch 36, Average Loss: 0.10076335221529006


100%|██████████| 110/110 [17:48<00:00,  9.72s/it]


Epoch 37, Average Loss: 0.09663950401273641


100%|██████████| 110/110 [17:56<00:00,  9.79s/it]


Epoch 38, Average Loss: 0.09303833652626384


100%|██████████| 110/110 [17:58<00:00,  9.81s/it]


Epoch 39, Average Loss: 0.08961951766501773


100%|██████████| 110/110 [18:01<00:00,  9.83s/it]


Epoch 40, Average Loss: 0.08706690100106326


100%|██████████| 110/110 [17:40<00:00,  9.64s/it]


Epoch 41, Average Loss: 0.08445824723352086


100%|██████████| 110/110 [17:58<00:00,  9.81s/it]


Epoch 42, Average Loss: 0.08346777592192997


100%|██████████| 110/110 [17:59<00:00,  9.81s/it]


Epoch 43, Average Loss: 0.08171757259829478


100%|██████████| 110/110 [17:29<00:00,  9.54s/it]


Epoch 44, Average Loss: 0.08013396053151652


100%|██████████| 110/110 [17:58<00:00,  9.81s/it]


Epoch 45, Average Loss: 0.07926593368703669


100%|██████████| 110/110 [18:12<00:00,  9.93s/it]


Epoch 46, Average Loss: 0.07585795575922186


100%|██████████| 110/110 [17:59<00:00,  9.81s/it]


Epoch 47, Average Loss: 0.07261303561654958


100%|██████████| 110/110 [17:59<00:00,  9.82s/it]


Epoch 48, Average Loss: 0.07005236148834229


100%|██████████| 110/110 [18:11<00:00,  9.92s/it]


Epoch 49, Average Loss: 0.06745050210844386


100%|██████████| 110/110 [17:41<00:00,  9.65s/it]


Epoch 50, Average Loss: 0.06479567302898927


100%|██████████| 110/110 [18:01<00:00,  9.83s/it]


Epoch 51, Average Loss: 0.062444556165825234


100%|██████████| 110/110 [17:53<00:00,  9.76s/it]


Epoch 52, Average Loss: 0.060172099383039905


100%|██████████| 110/110 [17:47<00:00,  9.71s/it]


Epoch 53, Average Loss: 0.05790683701634407


100%|██████████| 110/110 [17:27<00:00,  9.52s/it]


Epoch 54, Average Loss: 0.05625682303851301


100%|██████████| 110/110 [18:31<00:00, 10.10s/it]


Epoch 55, Average Loss: 0.05463418533856219


100%|██████████| 110/110 [18:05<00:00,  9.87s/it]


Epoch 56, Average Loss: 0.05316747525198893


100%|██████████| 110/110 [17:43<00:00,  9.67s/it]


Epoch 57, Average Loss: 0.05269422568380833


100%|██████████| 110/110 [17:46<00:00,  9.70s/it]


Epoch 58, Average Loss: 0.053190366246483545


100%|██████████| 110/110 [18:33<00:00, 10.12s/it]


Epoch 59, Average Loss: 0.05333545922555707


100%|██████████| 110/110 [18:00<00:00,  9.83s/it]


Epoch 60, Average Loss: 0.05054063698784871


100%|██████████| 110/110 [17:44<00:00,  9.68s/it]


Epoch 61, Average Loss: 0.04752902104095979


100%|██████████| 110/110 [17:55<00:00,  9.78s/it]


Epoch 62, Average Loss: 0.045696403069252316


100%|██████████| 110/110 [17:59<00:00,  9.81s/it]


Epoch 63, Average Loss: 0.04402409378777851


100%|██████████| 110/110 [18:06<00:00,  9.88s/it]


Epoch 64, Average Loss: 0.042315797338431535


100%|██████████| 110/110 [17:55<00:00,  9.78s/it]


Epoch 65, Average Loss: 0.0408795541999015


100%|██████████| 110/110 [17:43<00:00,  9.67s/it]


Epoch 66, Average Loss: 0.039318857138807124


100%|██████████| 110/110 [18:10<00:00,  9.92s/it]


Epoch 67, Average Loss: 0.03877137007022446


100%|██████████| 110/110 [18:28<00:00, 10.07s/it]


Epoch 68, Average Loss: 0.03760659670965238


100%|██████████| 110/110 [18:31<00:00, 10.11s/it]


Epoch 69, Average Loss: 0.035787325843491334


100%|██████████| 110/110 [18:15<00:00,  9.96s/it]


Epoch 70, Average Loss: 0.03453416013243524


100%|██████████| 110/110 [18:21<00:00, 10.02s/it]


Epoch 71, Average Loss: 0.03333230248906396


100%|██████████| 110/110 [18:19<00:00, 10.00s/it]


Epoch 72, Average Loss: 0.031758158484643154


100%|██████████| 110/110 [18:43<00:00, 10.21s/it]


Epoch 73, Average Loss: 0.030821976421231575


100%|██████████| 110/110 [18:03<00:00,  9.85s/it]


Epoch 74, Average Loss: 0.029703955724835397


100%|██████████| 110/110 [18:09<00:00,  9.90s/it]


Epoch 75, Average Loss: 0.02858529780060053


100%|██████████| 110/110 [18:36<00:00, 10.15s/it]


Epoch 76, Average Loss: 0.027708394181999295


100%|██████████| 110/110 [17:44<00:00,  9.67s/it]


Epoch 77, Average Loss: 0.026911997185512022


100%|██████████| 110/110 [17:52<00:00,  9.75s/it]


Epoch 78, Average Loss: 0.02607986218380657


100%|██████████| 110/110 [17:38<00:00,  9.62s/it]


Epoch 79, Average Loss: 0.02571393373337659


100%|██████████| 110/110 [17:54<00:00,  9.77s/it]


Epoch 80, Average Loss: 0.027499104485931722


100%|██████████| 110/110 [18:12<00:00,  9.93s/it]


Epoch 81, Average Loss: 0.033699553493749014


100%|██████████| 110/110 [18:06<00:00,  9.88s/it]


Epoch 82, Average Loss: 0.03377358503639698


100%|██████████| 110/110 [18:07<00:00,  9.89s/it]


Epoch 83, Average Loss: 0.028543468581681903


100%|██████████| 110/110 [17:58<00:00,  9.80s/it]


Epoch 84, Average Loss: 0.02510570887137543


100%|██████████| 110/110 [17:39<00:00,  9.63s/it]


Epoch 85, Average Loss: 0.02303664129735394


100%|██████████| 110/110 [18:30<00:00, 10.09s/it]


Epoch 86, Average Loss: 0.021428138940510426


100%|██████████| 110/110 [17:26<00:00,  9.52s/it]


Epoch 87, Average Loss: 0.02046654594384811


100%|██████████| 110/110 [18:25<00:00, 10.05s/it]


Epoch 88, Average Loss: 0.019663128536194563


100%|██████████| 110/110 [18:17<00:00,  9.98s/it]


Epoch 89, Average Loss: 0.018988504641774026


100%|██████████| 110/110 [18:05<00:00,  9.87s/it]


Epoch 90, Average Loss: 0.018302188026295466


100%|██████████| 110/110 [18:14<00:00,  9.95s/it]


Epoch 91, Average Loss: 0.017736201953481544


100%|██████████| 110/110 [17:46<00:00,  9.69s/it]


Epoch 92, Average Loss: 0.01721789129925045


100%|██████████| 110/110 [18:28<00:00, 10.08s/it]


Epoch 93, Average Loss: 0.016753530519252473


100%|██████████| 110/110 [18:20<00:00, 10.01s/it]


Epoch 94, Average Loss: 0.01618943095037883


100%|██████████| 110/110 [18:06<00:00,  9.88s/it]


Epoch 95, Average Loss: 0.015766179434616456


100%|██████████| 110/110 [18:19<00:00, 10.00s/it]


Epoch 96, Average Loss: 0.015315093701197342


100%|██████████| 110/110 [18:11<00:00,  9.92s/it]


Epoch 97, Average Loss: 0.014874098399146036


100%|██████████| 110/110 [18:10<00:00,  9.92s/it]


Epoch 98, Average Loss: 0.014454013752666387


100%|██████████| 110/110 [18:20<00:00, 10.00s/it]


Epoch 99, Average Loss: 0.014049320244653659


100%|██████████| 110/110 [18:40<00:00, 10.18s/it]

Epoch 100, Average Loss: 0.013693082417276773





In [10]:
model.eval()

# processor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')
import os

def load_and_preprocess_data(entry, img_dir, processor, max_length=77):
    # Load the image
    image_path = os.path.join(img_dir, entry['image'])
    image = Image.open(image_path).convert("RGB")
    
    # Truncate text to the maximum length allowed by CLIP
    text = entry['text']
    inputs = processor(text=[text[:max_length]], images=[image], return_tensors="pt", padding=True, truncation=True)
    return inputs


def perform_inference(model, processor, data, img_dir, threshold=0.4):
    results = []
    for entry in tqdm(data):
        try:
            inputs = load_and_preprocess_data(entry, img_dir, processor)
            inputs = {key: val.to(device) for key, val in inputs.items()}  # Move to GPU if available

            with torch.no_grad():
                outputs = model(**inputs)
                logits = outputs  # Assuming logits are being correctly handled as per your model
                probs = torch.sigmoid(logits).squeeze()
                # print(probs)
                # print(probs)
                predicted_labels = probs > threshold
                # print(predicted_labels)
                # print(predicted_labels_indices)
                predicted_labels_indices = predicted_labels.nonzero()
                # print(predicted_labels_indices)
                
                predicted_labels_indices = predicted_labels_indices.cpu().squeeze().numpy()
                
                
                # print(predicted_labels_indices, type(predicted_labels_indices))
                
                if predicted_labels_indices.ndim == 0:
                    predicted_labels_indices = [predicted_labels_indices.item()]
                
                # print(predicted_labels_indices)
                predicted_labels = [persuasion_techniques[idx] for idx in predicted_labels_indices]
                

            results.append({
                "id": entry['id'],
                "labels": predicted_labels
            })
        except Exception as e:
            print(f"Error processing entry {entry['id']}: {str(e)}")
            results.append({
                "id": entry['id'],
                "labels": []
            })
        
    return results



In [11]:
def load_data(json_file):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

data = load_data('annotations_v2/semeval2024_dev_release/subtask2a/dev_unlabeled.json')
img_dir = 'dev_images/dev_images'


results = perform_inference(model, processor, data, img_dir)

# Saving the results to a JSON file
with open('predicted_labels_clip_devUn.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4)

100%|██████████| 1000/1000 [00:59<00:00, 16.67it/s]


In [12]:
with open('predicted_labels_clip_devUn.json', 'r', encoding='utf-8') as f:
    pdata = f.read()
    print(pdata)


[
    {
        "id": "63866",
        "labels": [
            "Glittering generalities (Virtue)"
        ]
    },
    {
        "id": "78505",
        "labels": [
            "Flag-waving",
            "Name calling/Labeling",
            "Slogans",
            "Smears"
        ]
    },
    {
        "id": "65750",
        "labels": [
            "Smears"
        ]
    },
    {
        "id": "78716",
        "labels": [
            "Smears"
        ]
    },
    {
        "id": "67988",
        "labels": [
            "Black-and-white Fallacy/Dictatorship",
            "Loaded Language"
        ]
    },
    {
        "id": "65792",
        "labels": [
            "Smears"
        ]
    },
    {
        "id": "66205",
        "labels": [
            "Smears"
        ]
    },
    {
        "id": "65097",
        "labels": [
            "Smears"
        ]
    },
    {
        "id": "77523",
        "labels": [
            "Repetition",
            "Smears"
        ]
    },
    {
        "

In [13]:
!pip install numpy scikit-learn networkx sklearn-hierarchical-classification


Defaulting to user installation because normal site-packages is not writeable


In [14]:
! python subtask_1_2a.py -g dev_gold_labels\dev_gold_labels\dev_subtask2a_en.json -p predicted_labels_clip_devUn.json

f1_h=0.52785	prec_h=0.60858	rec_h=0.46603


In [15]:
data = load_data('test_data/test_data/north_macedonian/mk_subtask2a_test_unlabeled.json')
img_dir = 'test_images/test_images/subtask1_2a/north_macedonian'

results = perform_inference(model, processor, data, img_dir)

# Saving the results to a JSON file
with open('clip100_mk_predictions.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4)

100%|██████████| 259/259 [00:14<00:00, 17.60it/s]


In [16]:
!python subtask_1_2a.py -g gold_labels_ar_bg_md_version2/test_subtask2a_md.json -p clip100_mk_predictions.json

f1_h=0.62421	prec_h=0.69522	rec_h=0.56636


In [17]:
data = load_data('test_data/test_data/bulgarian/bg_subtask2a_test_unlabeled.json')
img_dir = 'test_images/test_images/subtask1_2a/bulgarian'

results = perform_inference(model, processor, data, img_dir)

# Saving the results to a JSON file
with open('clip100_bg_predictions.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4)

100%|██████████| 436/436 [00:24<00:00, 17.46it/s]


In [18]:
!python subtask_1_2a.py -g gold_labels_ar_bg_md_version2/test_subtask2a_bg.json -p clip100_bg_predictions.json

f1_h=0.55658	prec_h=0.59778	rec_h=0.52069


In [19]:
data = load_data('test_data_arabic/test_data_arabic/ar_subtask2a_test_unlabeled.json')
img_dir = 'test_images_arabic/test_images_arabic/subtask2a'

results = perform_inference(model, processor, data, img_dir)

# Saving the results to a JSON file
with open('clip100_ar_predictions.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4)

100%|██████████| 120/120 [00:06<00:00, 17.72it/s]


In [20]:
!python subtask_1_2a.py -g gold_labels_ar_bg_md_version2/test_subtask2a_ar.json -p clip100_ar_predictions.json

f1_h=0.49914	prec_h=0.51408	rec_h=0.48505


In [21]:
data = load_data('test_data/test_data/english/en_subtask2a_test_unlabeled.json')
img_dir = 'test_images/test_images/subtask1_2a/english'

results = perform_inference(model, processor, data, img_dir, 0.3)

# Saving the results to a JSON file
with open('clip100_en_predictions.json.txt', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4)

100%|██████████| 1500/1500 [01:26<00:00, 17.40it/s]


In [22]:
# English test scores
#Hierarchical F1	Hierarchical Precision	Hierarchical Recall
# 0.52467	               0.67117          	0.43067

In [23]:
torch.save(model.state_dict(), 'CLIP100_subtask2a.pth')