In [1]:
! pip install transformers
! pip install datasets 
! pip install --upgrade tqdm
! pip install torcheval

Collecting tqdm
  Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tqdm-4.66.2-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tqdm
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.66.1
    Uninstalling tqdm-4.66.1:
      Successfully uninstalled tqdm-4.66.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
keras-cv 0.8.2 requires keras-core, which is not installed.
keras-nlp 0.9.3 requires keras-core, which is not installed.
beatrix-jupyterlab 2023.128.151533 requires jupyterlab~=3.6.0, but you have jupyterlab 4.1.6 which is incompatible.
momepy 0.7.0 requires sha

In [2]:
model_path ="/kaggle/working/models/"

In [3]:
!mkdir -p models

In [4]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torcheval.metrics.functional import binary_auroc
from torch.optim.lr_scheduler import ReduceLROnPlateau

from transformers import BertModel, BertTokenizerFast

import os
from PIL import Image
import requests
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2

In [5]:
print(torch.__version__)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device=='cuda')

2.1.2
True


In [6]:
path = "/kaggle/input/facebook-hateful-memes/hateful_memes/"

In [7]:
import pandas as pd
val_seen = pd.read_json(path+'dev_seen.jsonl',lines=True)
val_unseen= pd.read_json(path+'dev_unseen.jsonl',lines=True)
test_seen=pd.read_json(path+'test_seen.jsonl',lines=True)
test_unseen=pd.read_json(path+'test_unseen.jsonl',lines=True)
df_train =pd.read_json(path+'train.jsonl',lines=True)

In [8]:
class HatefulMemesDataset(Dataset):
    def __init__(self, jsonl_file, root_dir, transform=None):
        self.annotations = pd.read_json(jsonl_file, lines=True)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        id = self.annotations.loc[idx, 'id']
        img_path = os.path.join(self.root_dir, self.annotations.loc[idx, 'img'])
        label = self.annotations.loc[idx, 'label']
        text = self.annotations.loc[idx, 'text']

        try:
            # Load image
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # Apply transformations
            if self.transform:
                image = self.transform(image)

            sample = {"id": id, "image": image, "label": label, "text": text}

            return sample

        except Exception as e:
            print(f"Error loading image at index {idx}: {e}")
            return None

In [9]:
hm_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

In [10]:
train_jsonl = os.path.join(path, "train.jsonl")
val_seen_jsonl = os.path.join(path, "dev_seen.jsonl")
val_unseen_jsonl = os.path.join(path, "dev_unseen.jsonl")
test_seen_jsonl = os.path.join(path, "test_seen.jsonl")
test_unseen_jsonl = os.path.join(path, "test_unseen.jsonl")

In [11]:
# Create datasets
train_dataset = HatefulMemesDataset(jsonl_file=train_jsonl, root_dir=path, transform=hm_transform)
val_seen_dataset = HatefulMemesDataset(jsonl_file=val_seen_jsonl, root_dir=path, transform=hm_transform)
val_unseen_dataset = HatefulMemesDataset(jsonl_file=val_unseen_jsonl, root_dir=path, transform=hm_transform)
test_seen_dataset = HatefulMemesDataset(jsonl_file=test_seen_jsonl, root_dir=path, transform=hm_transform)
test_unseen_dataset = HatefulMemesDataset(jsonl_file=test_unseen_jsonl, root_dir=path, transform=hm_transform)

In [12]:
# Hyperparameters
batch_size = 64

In [13]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [14]:
val_dataset = ConcatDataset([val_seen_dataset, val_unseen_dataset])
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [15]:
test_dataset = ConcatDataset([test_seen_dataset, test_unseen_dataset])
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Metrics

In [16]:
def get_metrics_and_losses(losses, predictions, labels):
  average_loss = losses.mean().item()
  accuracy = (predictions == labels).sum().item() / labels.numel()
  auroc = binary_auroc(predictions, labels)

  return average_loss, accuracy, auroc

# Resnet152

In [17]:
resnet152 = models.resnet152(pretrained=True)
resnet152_fe = nn.Sequential(*list(resnet152.children())[:-1])
for p in resnet152_fe.parameters():
    p.requires_grad = False
resnet152_fe.to(device)

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:01<00:00, 166MB/s]  


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


# BERT

In [18]:
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")
for param in bert_model.parameters():
    param.requires_grad = False
bert_model.to(device)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

# Head

In [19]:
class SimpleHead(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear_stack = nn.Sequential(
            nn.BatchNorm1d(2816),
            nn.Linear(2816, 1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024, 2),
        )
    
    def forward(self, x):
        logits = self.linear_stack(x)
        return logits

In [20]:
head = SimpleHead()
head.to(device)

SimpleHead(
  (linear_stack): Sequential(
    (0): BatchNorm1d(2816, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=2816, out_features=1024, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=1024, out_features=2, bias=True)
  )
)

In [21]:
lr = 1e-5
epochs = 40
weight_decay = 1e-3
accumulate_steps = 4

# Define criterion
criterion = nn.CrossEntropyLoss()

# Define optimizer
optimizer = optim.Adam(head.parameters(), lr=lr, weight_decay=weight_decay)

# Initialize learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)

# Initialize early stopping parameters
early_stopping_counter = 0
early_stopping_patience = 5
best_validation_auroc = float('-inf')

In [22]:
# Train model head
for e in range(epochs):
    # Training
    train_losses = torch.zeros(len(train_dataloader)).to(device)
    train_predictions = torch.Tensor().to(device)
    train_labels = torch.Tensor().to(device)

    resnet152_fe.train()
    bert_model.train()
    head.train()

    for i, data in tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc="Epoch " + str(e+1) + " Training"):
        text = data['text']
        image = data['image'].to(device).float()
        labels = data['label'].to(device).to(torch.int64)

        optimizer.zero_grad()

        image_features = resnet152_fe(image).squeeze()

        tokenized_input = tokenizer(text, padding=True, return_tensors="pt").to(device)
        text_features = bert_model(**tokenized_input)

        concat_features = torch.cat((image_features, text_features["pooler_output"]), dim=1)

        scores = torch.squeeze(head(concat_features))
        predictions = scores.argmax(dim=-1)

        loss = criterion(scores, labels)
        loss.backward()
        optimizer.step()

        train_losses[i] = loss
        train_predictions = torch.cat((train_predictions, predictions))
        train_labels = torch.cat((train_labels, labels))

        train_average_loss, train_accuracy, train_auroc = get_metrics_and_losses(train_losses, train_predictions, train_labels)

    # Validation
    validate_losses = torch.zeros(len(val_dataloader)).to(device)
    validate_predictions = torch.Tensor().to(device)
    validate_labels = torch.Tensor().to(device)

    resnet152_fe.eval()
    bert_model.eval()
    head.eval()

    with torch.no_grad():
        for i, data in tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc="Epoch " + str(e+1) + " Validation"):
            text = data['text']
            image = data['image'].to(device).float()
            labels = data['label'].to(device).to(torch.int64)

            image_features = resnet152_fe(image).squeeze()

            tokenized_input = tokenizer(text, padding=True, return_tensors="pt").to(device)
            text_features = bert_model(**tokenized_input)

            concat_features = torch.cat((image_features, text_features["pooler_output"]), dim=1)

            scores = torch.squeeze(head(concat_features))
            predictions = scores.argmax(dim=-1)

            loss = criterion(scores, labels)

            validate_losses[i] = loss
            validate_predictions = torch.cat((validate_predictions, predictions), dim=0)
            validate_labels = torch.cat((validate_labels, labels), dim=0)

    validate_average_loss, validate_accuracy, validate_auroc = get_metrics_and_losses(validate_losses, validate_predictions, validate_labels)

    # Update learning rate scheduler
    scheduler.step(validate_auroc)

    # Early stopping
    if validate_auroc > best_validation_auroc:
        best_validation_auroc = validate_auroc
        early_stopping_counter = 0
        # Save the best model
        torch.save(head, model_path + "/concat_bert_best.pt")
        print("New best model saved at epoch", e+1)
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= early_stopping_patience:
            print("Early stopping triggered at epoch", e+1)
            break

    # Additional logging or tasks can be performed here
    print("Epoch %d" % (e+1))
    print("Training Loss: %.4f. Validation Loss: %.4f. " % (train_average_loss, validate_average_loss))
    print("Training Accuracy: %.4f. Validation Accuracy: %.4f. " % (train_accuracy, validate_accuracy))
    print("Training AUROC: %.4f. Validation AUROC: %.4f. " % (train_auroc, validate_auroc))
    print("-----------------------------------")

print("Training complete.")

Epoch 1 Training: 100%|██████████| 133/133 [03:50<00:00,  1.73s/it]
Epoch 1 Validation: 100%|██████████| 17/17 [00:24<00:00,  1.45s/it]


New best model saved at epoch 1
Epoch 1
Training Loss: 0.6474. Validation Loss: 0.6957. 
Training Accuracy: 0.6228. Validation Accuracy: 0.5740. 
Training AUROC: 0.5683. Validation AUROC: 0.5210. 
-----------------------------------


Epoch 2 Training: 100%|██████████| 133/133 [03:08<00:00,  1.42s/it]
Epoch 2 Validation: 100%|██████████| 17/17 [00:22<00:00,  1.29s/it]


New best model saved at epoch 2
Epoch 2
Training Loss: 0.6048. Validation Loss: 0.7022. 
Training Accuracy: 0.6755. Validation Accuracy: 0.5904. 
Training AUROC: 0.5847. Validation AUROC: 0.5348. 
-----------------------------------


Epoch 3 Training: 100%|██████████| 133/133 [03:11<00:00,  1.44s/it]
Epoch 3 Validation: 100%|██████████| 17/17 [00:21<00:00,  1.29s/it]


Epoch 3
Training Loss: 0.5899. Validation Loss: 0.7068. 
Training Accuracy: 0.6878. Validation Accuracy: 0.5875. 
Training AUROC: 0.5994. Validation AUROC: 0.5312. 
-----------------------------------


Epoch 4 Training: 100%|██████████| 133/133 [03:08<00:00,  1.42s/it]
Epoch 4 Validation: 100%|██████████| 17/17 [00:22<00:00,  1.29s/it]


Epoch 4
Training Loss: 0.5777. Validation Loss: 0.7067. 
Training Accuracy: 0.6982. Validation Accuracy: 0.5817. 
Training AUROC: 0.6137. Validation AUROC: 0.5275. 
-----------------------------------


Epoch 5 Training: 100%|██████████| 133/133 [03:11<00:00,  1.44s/it]
Epoch 5 Validation: 100%|██████████| 17/17 [00:21<00:00,  1.29s/it]


Epoch 5
Training Loss: 0.5669. Validation Loss: 0.7129. 
Training Accuracy: 0.7104. Validation Accuracy: 0.5798. 
Training AUROC: 0.6312. Validation AUROC: 0.5208. 
-----------------------------------


Epoch 6 Training: 100%|██████████| 133/133 [03:16<00:00,  1.48s/it]
Epoch 6 Validation: 100%|██████████| 17/17 [00:27<00:00,  1.62s/it]


Epoch 6
Training Loss: 0.5560. Validation Loss: 0.7105. 
Training Accuracy: 0.7171. Validation Accuracy: 0.5846. 
Training AUROC: 0.6373. Validation AUROC: 0.5281. 
-----------------------------------


Epoch 7 Training: 100%|██████████| 133/133 [03:12<00:00,  1.45s/it]
Epoch 7 Validation: 100%|██████████| 17/17 [00:22<00:00,  1.30s/it]

Early stopping triggered at epoch 7
Training complete.





In [23]:
# Rebuild the model architecture
best_head = torch.load(model_path + "/concat_bert_best.pt")
best_head.to(device)

# Evaluate on test set
test_losses = torch.zeros(len(test_dataloader)).to(device)
test_predictions = torch.Tensor().to(device)
test_labels = torch.Tensor().to(device)

best_head.eval()

with torch.no_grad():
    for i, data in tqdm(enumerate(test_dataloader), total=len(test_dataloader), desc="Testing"):
        text = data['text']
        image = data['image'].to(device).float()
        labels = data['label'].to(device).to(torch.int64)

        image_features = resnet152_fe(image).squeeze()

        tokenized_input = tokenizer(text, padding=True, return_tensors="pt").to(device)
        text_features = bert_model(**tokenized_input)

        concat_features = torch.cat((image_features, text_features["pooler_output"]), dim=1)

        scores = torch.squeeze(best_head(concat_features))
        predictions = scores.argmax(dim=-1)

        loss = criterion(scores, labels)

        test_losses[i] = loss
        test_predictions = torch.cat((test_predictions, predictions), dim=0)
        test_labels = torch.cat((test_labels, labels), dim=0)

test_average_loss, test_accuracy, test_auroc = get_metrics_and_losses(test_losses, test_predictions, test_labels)

print("Test Loss: %.4f" % test_average_loss)
print("Test Accuracy: %.4f" % test_accuracy)
print("Test AUROC: %.4f" % test_auroc)

Testing: 100%|██████████| 47/47 [01:17<00:00,  1.64s/it]

Test Loss: 0.6819
Test Accuracy: 0.6080
Test AUROC: 0.5406



