In [1]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os

os.chdir("/content/drive/My Drive/Colab Notebooks/cs7643")
print("Current working directory:", os.getcwd())

Current working directory: /content/drive/My Drive/Colab Notebooks/cs7643


In [3]:
import os
import urllib.request
import tarfile

# URL of the Hateful Memes features archive
URL = "https://dl.fbaipublicfiles.com/mmf/data/datasets/hateful_memes/defaults/features/features_2020_10_01.tar.gz"
TAR_PATH = "features_2020_10_01.tar.gz"
EXTRACT_DIR = "detectron.lmdb"

# 1. Download the tar.gz file (if not already downloaded)
if not os.path.exists(TAR_PATH):
    print(f"Downloading from {URL} ...")
    urllib.request.urlretrieve(URL, TAR_PATH)
    print(f"Downloaded to {TAR_PATH}")
else:
    print(f"{TAR_PATH} already exists, skipping download.")

# 2. Extract the tar.gz file (if not already extracted)
if not os.path.exists(EXTRACT_DIR):
    print(f"Extracting {TAR_PATH} ...")
    with tarfile.open(TAR_PATH, "r:gz") as tar:
        tar.extractall()
    print(f"Extraction complete. Files are in: {EXTRACT_DIR}/")
else:
    print(f"{EXTRACT_DIR}/ already exists, skipping extraction.")

features_2020_10_01.tar.gz already exists, skipping download.
detectron.lmdb/ already exists, skipping extraction.


In [4]:
!pip install lmdb



In [5]:
import torch
import numpy as np
import random
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)               # for single GPU
    torch.cuda.manual_seed_all(seed)           # if using multi-GPU

    # Ensure deterministic behavior
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # For some CUDA operations (e.g., DataLoader workers)
    os.environ["PYTHONHASHSEED"] = str(seed)

set_seed(42)


In [6]:
import lmdb
import torch
import pickle
from torch.utils.data import Dataset


class HatefulMemesDataset(Dataset):
    def __init__(self, hf_split, lmdb_path, tokenizer):
        """
        hf_split: one split from the HF DatasetDict (e.g. hf_ds['train'])
        lmdb_env: opened lmdb.Environment
        tokenizer: HuggingFace tokenizer (optional)
        """
        self.data = hf_split
        self.lmdb_path = lmdb_path
        self.env = None
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def _get_image_id(self, img_path):
        # "img/40259.png" -> "40259"
        return img_path.split("/")[-1].split(".")[0]

    def _load_visual_feats(self, img_id):
        if self.env is None:  # opened separately in each worker
            self.env = lmdb.open(
                self.lmdb_path,
                readonly=True,
                lock=False,
                readahead=False,
                meminit=False,
            )
        with self.env.begin(write=False) as txn:
            buf = txn.get(img_id.encode("utf-8"))
            sample = pickle.loads(buf)
        feats = torch.tensor(
            sample["features"], dtype=torch.float32
        )  # (num_boxes, 2048)
        bbox = torch.tensor(sample["bbox"], dtype=torch.float32)  # (num_boxes, 4)
        return feats, bbox

    def __getitem__(self, idx):
        row = self.data[idx]
        text = row["text"]
        img_path = row["img"]
        label = row["label"]

        img_id = self._get_image_id(img_path)
        visual_embeds, _ = self._load_visual_feats(img_id)

        encoded = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=48,
            return_tensors="pt",
        )

        item = {
            "input_ids": encoded["input_ids"].squeeze(0),
            "attention_mask": encoded["attention_mask"].squeeze(0),
            "token_type_ids": encoded["token_type_ids"].squeeze(0),
            "visual_embeds": visual_embeds,
            "visual_attention_mask": torch.ones(
                visual_embeds.size(0), dtype=torch.long
            ),
            "visual_token_type_ids": torch.zeros(
                visual_embeds.size(0), dtype=torch.long
            ),
            "label": torch.tensor(label, dtype=torch.long),
        }

        return item

In [7]:
import pandas as pd
from datasets import Dataset, load_dataset
from torch.utils.data import DataLoader
from transformers import VisualBertModel, VisualBertConfig, BertTokenizer

dataset = load_dataset("neuralcatcher/hateful_memes")
# Remove duplicates
for i_split, i_data in dataset.items():
    dataset[i_split] = Dataset.from_pandas(
        pd.DataFrame(i_data).drop_duplicates(), preserve_index=False
    )

lmdb_path = "detectron.lmdb"
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

loader_train = DataLoader(
    HatefulMemesDataset(dataset["train"], lmdb_path, tokenizer),
    batch_size=64,
    shuffle=True,
    num_workers=8,
)

loader_validation = DataLoader(
    HatefulMemesDataset(dataset["validation"], lmdb_path, tokenizer),
    batch_size=265,
    shuffle=False,
    num_workers=8,
)

loader_test = DataLoader(
    HatefulMemesDataset(dataset["test"], lmdb_path, tokenizer),
    batch_size=265,
    shuffle=False,
    num_workers=8,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Repo card metadata block was not found. Setting CardData to empty.


In [8]:
import torch.nn as nn
from transformers import VisualBertModel

def freeze_all_but_last_n_layers(model, n):
    # freeze everything
    for p in model.parameters():
        p.requires_grad = False

    # unfreeze last n encoder layers
    total = len(model.encoder.layer)
    for i in range(total - n, total):
        for p in model.encoder.layer[i].parameters():
            p.requires_grad = True

    # unfreeze pooler (optional)
    for p in model.pooler.parameters():
        p.requires_grad = True

class VisualBertForClassification(nn.Module):
    def __init__(self, num_labels=2):
        super().__init__()
        self.visualbert = VisualBertModel.from_pretrained(
            "uclanlp/visualbert-vqa-coco-pre"
        )
        freeze_all_but_last_n_layers(self.visualbert, n=4)
        hidden_size = self.visualbert.config.hidden_size
        self.dropout = nn.Dropout(p=0.5)
        self.classifier = nn.Linear(hidden_size, num_labels)

    def forward(
        self,
        input_ids,
        attention_mask,
        token_type_ids,
        visual_embeds,
        visual_attention_mask,
        visual_token_type_ids,
    ):
        outputs = self.visualbert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            visual_embeds=visual_embeds,
            visual_attention_mask=visual_attention_mask,
            visual_token_type_ids=visual_token_type_ids,
        )

        pooled = outputs.pooler_output  # (B, 768)
        logits = self.classifier(self.dropout(pooled))
        return logits

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

model = VisualBertForClassification().to(device)

optimizer = torch.optim.AdamW([
    {"params": model.visualbert.parameters(), "lr": 5e-6, "weight_decay": 0},
    {"params": model.classifier.parameters(),  "lr": 1e-4, "weight_decay": 1e-4},
])
class_weights = torch.tensor([0.7754, 1.4077], dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.00)

Using cuda


In [10]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

keys2pass = [
    "input_ids",
    "attention_mask",
    "token_type_ids",
    "visual_embeds",
    "visual_attention_mask",
    "visual_token_type_ids",
]
model.eval()
all_preds_validation = []
all_probs_validation = []
all_labels_validation = []
val_loss = 0
with torch.inference_mode():
    for batch in loader_validation:
        batch = {k: v.to(device) for k, v in batch.items()}
        logits = model(**{k: batch[k] for k in keys2pass})
        loss = criterion(logits, batch["label"])
        val_loss += loss.item()

        probs = logits.softmax(dim=1)[:, 1].cpu().numpy()
        preds = logits.argmax(dim=1).cpu().numpy()
        labels = batch["label"].cpu().numpy()

        all_probs_validation.extend(probs)
        all_preds_validation.extend(preds)
        all_labels_validation.extend(labels)

acc = accuracy_score(all_labels_validation, all_preds_validation)
f1 = f1_score(all_labels_validation, all_preds_validation)
auc = roc_auc_score(all_labels_validation, all_probs_validation)
print("Epoch 0")
print(f"Val Loss: {val_loss:.4f} | ACC: {acc:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")
for epoch in range(15):
    model.train()
    all_preds_train = []
    all_probs_train = []
    all_labels_train = []
    total_loss = 0
    print(f"Epoch {epoch+1}")
    for batch in loader_train:
        batch = {k: v.to(device) for k, v in batch.items()}
        optimizer.zero_grad()
        logits = model(**{k: batch[k] for k in keys2pass})
        loss = criterion(logits, batch["label"])
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item()

        probs = logits.softmax(dim=1)[:, 1].detach().cpu().numpy()
        preds = logits.argmax(dim=1).detach().cpu().numpy()
        labels = batch["label"].cpu().numpy()

        all_probs_train.extend(probs)
        all_preds_train.extend(preds)
        all_labels_train.extend(labels)
    acc = accuracy_score(all_labels_train, all_preds_train)
    f1 = f1_score(all_labels_train, all_preds_train)
    auc = roc_auc_score(all_labels_train, all_probs_train)
    print(f"Train Loss: {total_loss:.4f} | ACC: {acc:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")
    # ---- VALIDATION ----
    model.eval()
    all_preds_validation = []
    all_probs_validation = []
    all_labels_validation = []
    val_loss = 0
    with torch.inference_mode():
        for batch in loader_validation:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: batch[k] for k in keys2pass})
            loss = criterion(logits, batch["label"])
            val_loss += loss.item()

            probs = logits.softmax(dim=1)[:, 1].cpu().numpy()
            preds = logits.argmax(dim=1).cpu().numpy()
            labels = batch["label"].cpu().numpy()

            all_probs_validation.extend(probs)
            all_preds_validation.extend(preds)
            all_labels_validation.extend(labels)

    acc = accuracy_score(all_labels_validation, all_preds_validation)
    f1 = f1_score(all_labels_validation, all_preds_validation)
    auc = roc_auc_score(all_labels_validation, all_probs_validation)
    print(f"Val Loss: {val_loss:.4f} | ACC: {acc:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")

Epoch 0
Val Loss: 2.0465 | ACC: 0.6141 | F1: 0.0000 | AUC: 0.5029
Epoch 1
Train Loss: 95.3630 | ACC: 0.5080 | F1: 0.4098 | AUC: 0.4989
Val Loss: 2.0228 | ACC: 0.6266 | F1: 0.1898 | AUC: 0.5599
Epoch 2
Train Loss: 88.2912 | ACC: 0.6108 | F1: 0.5003 | AUC: 0.6360
Val Loss: 1.9747 | ACC: 0.6062 | F1: 0.3298 | AUC: 0.6005
Epoch 3
Train Loss: 81.9117 | ACC: 0.6736 | F1: 0.5767 | AUC: 0.7200
Val Loss: 2.0203 | ACC: 0.6078 | F1: 0.4532 | AUC: 0.6132
Epoch 4
Train Loss: 78.8574 | ACC: 0.7033 | F1: 0.6022 | AUC: 0.7500
Val Loss: 2.0248 | ACC: 0.6109 | F1: 0.4404 | AUC: 0.6222
Epoch 5
Train Loss: 76.1757 | ACC: 0.7152 | F1: 0.6269 | AUC: 0.7706
Val Loss: 2.0263 | ACC: 0.6250 | F1: 0.4393 | AUC: 0.6304
Epoch 6
Train Loss: 74.0007 | ACC: 0.7301 | F1: 0.6406 | AUC: 0.7871
Val Loss: 2.0741 | ACC: 0.6297 | F1: 0.4344 | AUC: 0.6330
Epoch 7
Train Loss: 72.1780 | ACC: 0.7388 | F1: 0.6553 | AUC: 0.7990
Val Loss: 2.0159 | ACC: 0.6375 | F1: 0.4502 | AUC: 0.6425
Epoch 8
Train Loss: 70.5681 | ACC: 0.7471 | F

In [11]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

model.eval()
all_preds = []
all_probs = []
all_labels = []
test_loss = 0

with torch.inference_mode():
    for batch in loader_test:
        batch = {k: v.to(device) for k, v in batch.items()}

        logits = model(**{k: batch[k] for k in keys2pass})
        loss = criterion(logits, batch["label"])
        test_loss += loss.item()

        probs = logits.softmax(dim=1)[:, 1].cpu().numpy()
        preds = logits.argmax(dim=1).cpu().numpy()
        labels = batch["label"].cpu().numpy()

        all_probs.extend(probs)
        all_preds.extend(preds)
        all_labels.extend(labels)

acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
auc = roc_auc_score(all_labels, all_probs)

print("Test Loss:", test_loss)
print("Test Acc:", acc)
print("Test F1:", f1)
print("Test AUC:", auc)

Test Loss: 9.210083335638046
Test Acc: 0.6433333333333333
Test F1: 0.5201793721973094
Test AUC: 0.6763407258064517
