In [2]:
import json
import os
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision.models import resnet50
from torchvision import models

import torch.nn as nn
import torch.optim as optim


In [4]:
class HatefulMemeDataset(Dataset):
    def __init__(self, json_file, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform

        with open(json_file, "r") as f:
            self.data = [json.loads(line) for line in f]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.data[index]["img"])
        image = Image.open(img_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        label = self.data[index]["label"]

        return image, label


# Define the image transformations for data augmentation
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize(size=256),
    transforms.CenterCrop(size=224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the datasets
train_dataset = HatefulMemeDataset(
    json_file="data/train.jsonl", img_dir="data/", transform=transform_train)
val_dataset = HatefulMemeDataset(
    json_file="data/dev.jsonl", img_dir="data/", transform=transform_test)
test_dataset = HatefulMemeDataset(
    json_file="data/test.jsonl", img_dir="data/", transform=transform_test)

# Define the dataloaders
train_loader = DataLoader(train_dataset, batch_size=64,
                          shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64,
                        shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64,
                         shuffle=False, num_workers=4)


In [5]:



# Define the ResNet-50 model
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 1)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Move the model and loss function to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion.to(device)

# Train the model
num_epochs = 10
best_val_loss = float('inf')
for epoch in range(num_epochs):
    # Train loop
    model.train()
    train_loss = 0
    train_correct = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images).squeeze()

        loss = criterion(outputs, labels.float())
        loss.backward()

        optimizer.step()

        train_loss += loss.item() * images.size(0)
        train_correct += ((outputs >= 0.5).int() == labels).sum().item()

train_loss = train_loss / len(train_loader.dataset)
train_acc = train_correct / len(train_loader.dataset)

# Validation loop
model.eval()
val_loss = 0
val_correct = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images).squeeze()

        loss = criterion(outputs, labels.float())

        val_loss += loss.item() * images.size(0)
        val_correct += ((outputs >= 0.5).int() == labels).sum().item()

    val_loss = val_loss / len(val_loader.dataset)
    val_acc = val_correct / len(val_loader.dataset)

    # Print the epoch results
    print(f"Epoch {epoch+1}/{num_epochs} -- Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    # Check if this is the best model so far based on validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "resnet50_hateful_memes.pth")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/aayush/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:01<00:00, 34.4MB/s]


Epoch 10/10 -- Train Loss: 0.2966, Val Loss: 1.4925, Train Acc: 0.8602, Val Acc: 0.5140


In [None]:
model.eval()
test_loss = 0
test_correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).squeeze()

        loss = criterion(outputs, labels.float())

        test_loss += loss.item() * images.size(0)
        test_correct += ((outputs >= 0.5).int() == labels).sum().item()
    
    test_loss = test_loss / len(test_loader.dataset)
    test_acc = test_correct / len(test_loader.dataset)

    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
