<a href="https://colab.research.google.com/github/cadenlpicard/hateful_meme_classification/blob/main/Lab_3_Representation_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch transformers pillow pytesseract


### Extract Text Features


In [None]:
import json
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm
import numpy as np

print("Importing done")

# Paths to JSONL files
data_dir = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes'
train_file = f'{data_dir}/train.jsonl'
dev_file = f'{data_dir}/dev_seen.jsonl'

# Load JSONL data with a progress bar
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in tqdm(f, desc=f"Loading {file_path.split('/')[-1]}", unit="line"):
            data.append(json.loads(line))
    return data

train_data = load_jsonl(train_file)
dev_data = load_jsonl(dev_file)

print("Data loaded")

# Custom Dataset class to handle on-the-fly encoding
class MemeDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]['text']
        label = self.data[idx]['label']
        encoding = self.tokenizer(
            text, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt'
        )
        return encoding['input_ids'].squeeze(), encoding['attention_mask'].squeeze(), torch.tensor(label, dtype=torch.long)


model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
bert_model = AutoModel.from_pretrained(model_name)
bert_model.eval()

print("Tokenizer and model loaded")

# Dataset and DataLoader creation
batch_size = 500  # Adjust based on available memory
train_dataset = MemeDataset(train_data, tokenizer)
dev_dataset = MemeDataset(dev_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

print("Dataloaders created")

# Feature extraction with batch processing and progress tracking
def extract_features(data_loader, model, save_path=None):
    all_text_features = []
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Extracting text features", unit="batch"):
            input_ids, attention_mask, _ = [x.to(device) for x in batch]
            batch_features = model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
            all_text_features.append(batch_features.cpu())

    all_text_features = torch.cat(all_text_features, dim=0)
    if save_path:
        np.save(save_path, all_text_features.numpy())
        print(f"Text features saved to {save_path}")

print("Starting feature extraction")
extract_features(train_loader, bert_model, save_path='/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/train_text_features.npy')
extract_features(dev_loader, bert_model, save_path='/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/dev_text_features.npy')


### Create text classifier, model, and evaluate

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from tqdm import tqdm
import pandas as pd

# Load precomputed text features and labels
train_text_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/train_text_features.npy')  # Load text features
train_labels = [entry['label'] for entry in load_jsonl(train_file)]  # Load labels

# Convert to tensors
train_text_features = torch.tensor(train_text_features, dtype=torch.float32)
train_labels = torch.tensor(train_labels, dtype=torch.float32)

# Create DataLoader
batch_size = 32  # Define your batch size
train_dataset = TensorDataset(train_text_features, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define the Classifier Model
class TextClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TextClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize the classifier
input_dim = 768  # BERT base embedding size
hidden_dim = 128  # Adjustable hyperparameter
output_dim = 1    # Binary classification
classifier = TextClassifier(input_dim, hidden_dim, output_dim)

# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(classifier.parameters(), lr=1e-3)

# Initialize an empty list to store evaluation metrics for each epoch
epoch_results = []

# Training and evaluation
num_epochs = 50
for epoch in range(num_epochs):
    classifier.train()
    epoch_loss = 0

    # Training phase
    for embeddings, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1} Training"):
        optimizer.zero_grad()
        outputs = classifier(embeddings)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1}, Training Loss: {epoch_loss / len(train_loader):.4f}")

    # Evaluation phase
    classifier.eval()
    all_preds, all_labels = [], []

    # Load and process dev set text features as done with the training set
    dev_text_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/dev_text_features.npy')  # Load dev features
    dev_labels = [entry['label'] for entry in load_jsonl(dev_file)]

    # Convert dev data to tensors
    dev_text_features = torch.tensor(dev_text_features, dtype=torch.float32)
    dev_labels = torch.tensor(dev_labels, dtype=torch.float32)
    dev_dataset = TensorDataset(dev_text_features, dev_labels)
    dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

    with torch.no_grad():
        for embeddings, labels in tqdm(dev_loader, desc=f"Epoch {epoch + 1} Evaluation"):
            outputs = classifier(embeddings)
            preds = (torch.sigmoid(outputs.squeeze()) > 0.5).float()

            all_preds.extend(preds.numpy())
            all_labels.extend(labels.numpy())

    # Calculate metrics for the current epoch
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='binary', zero_division=0)
    auc_roc = roc_auc_score(all_labels, all_preds)

    # Append results to epoch_results list
    epoch_results.append({
        "Epoch": epoch + 1,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "AUC-ROC": auc_roc
    })

    print(f"Epoch {epoch + 1} - Dev Set Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"AUC-ROC: {auc_roc:.4f}")

# Convert epoch_results to a DataFrame for tabular display
results_df = pd.DataFrame(epoch_results)
print("\nSummary Table of Evaluation Outcomes")
print(results_df)

# Define the file path where you want to save the model
model_save_path = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/text_classifier_model.pth'

# Save the model's state dictionary
torch.save(classifier.state_dict(), model_save_path)

print(f"Model saved to '{model_save_path}'")


### Get Average size of images and delete large images

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

# Path to images
image_dir = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/img'

# Get sizes of all image files in the directory
image_sizes = []
for filename in os.listdir(image_dir):
    if filename.endswith(('.jpg', '.png')):  # Filter image files
        file_path = os.path.join(image_dir, filename)
        file_size = os.path.getsize(file_path)  # File size in bytes
        image_sizes.append(file_size)

# Convert to numpy array for easy calculation
image_sizes = np.array(image_sizes)

# Calculate statistics
average_size = np.mean(image_sizes)
min_size = np.min(image_sizes)
max_size = np.max(image_sizes)
median_size = np.median(image_sizes)

# Display results
print(f"Average file size: {average_size / 1024:.2f} KB")
print(f"Minimum file size: {min_size / 1024:.2f} KB")
print(f"Maximum file size: {max_size / 1024:.2f} KB")
print(f"Median file size: {median_size / 1024:.2f} KB")


# Plot the distribution of image sizes
plt.figure(figsize=(10, 6))
plt.hist(image_sizes, bins=30, edgecolor='black')
plt.title("Distribution of Image File Sizes")
plt.xlabel("File Size (KB)")
plt.ylabel("Number of Images")
plt.show()


# Set the size threshold (in KB)
size_threshold_kb = average_size / 1024

# Loop through files and delete those larger than the threshold
for filename in os.listdir(image_dir):
    if filename.endswith(('.jpg', '.png')):  # Filter for image files
        file_path = os.path.join(image_dir, filename)
        file_size_kb = os.path.getsize(file_path) / 1024  # Convert size to KB

        if file_size_kb > size_threshold_kb:
            os.remove(file_path)  # Delete the file
            print(f"Deleted {filename} ({file_size_kb:.2f} KB)")

print("Deletion complete.")



### Pre-Process and Extract Features from Images files

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import os
import numpy as np
from torch.utils.data import Dataset
from tqdm import tqdm
import time

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Path to images
image_dir = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/img'

# Image preprocessing
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Custom Dataset class to handle image loading and preprocessing
class ImageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        file_size_kb = os.path.getsize(img_path) / 1024  # File size in KB
        return img, self.image_files[idx], file_size_kb

# Load ResNet model, move to GPU, and set to evaluation mode
resnet_model = torchvision.models.resnet50(pretrained=True).to(device)
resnet_model.eval()

# Set maximum batch size in MB
max_batch_size_kb = 150 * 1024  # 150MB in KB

# Create an instance of the dataset
dataset = ImageDataset(image_dir=image_dir, transform=preprocess)

# Extract features in dynamic batches and store them
image_features = {}
with torch.no_grad():
    batch_images, batch_filenames = [], []
    cumulative_size_kb = 0  # Track cumulative batch size in KB

    for img, filename, file_size_kb in tqdm(dataset, desc="Extracting image features"):
        # Add image to batch if within size limit
        if cumulative_size_kb + file_size_kb <= max_batch_size_kb:
            batch_images.append(img)
            batch_filenames.append(filename)
            cumulative_size_kb += file_size_kb
        else:
            # Process current batch
            start_time = time.time()

            # Stack images into a single tensor batch and move to device
            batch_tensor = torch.stack(batch_images).to(device)

            # Extract features
            features = resnet_model(batch_tensor)

            # Move features back to CPU and store with filenames
            for i, fname in enumerate(batch_filenames):
                image_features[fname] = features[i].cpu().numpy()

            # Display batch metrics
            batch_time = time.time() - start_time
            print(f"Processed batch of size {cumulative_size_kb / 1024:.2f} MB in {batch_time:.2f}s")

            # Reset batch accumulators
            batch_images, batch_filenames = [img], [filename]
            cumulative_size_kb = file_size_kb

    # Process any remaining images in the last batch
    if batch_images:
        batch_tensor = torch.stack(batch_images).to(device)
        features = resnet_model(batch_tensor)
        for i, fname in enumerate(batch_filenames):
            image_features[fname] = features[i].cpu().numpy()
        print(f"Processed final batch of size {cumulative_size_kb / 1024:.2f} MB")

# Save features to a .npy file
np.save('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/image_features.npy', image_features)
print("Image features saved to 'image_features.npy'")


### Load feature vectors and Build Classfier

In [None]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from tqdm import tqdm  # Import tqdm for progress bars

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load image features
image_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/image_features.npy', allow_pickle=True).item()
data_dir = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes'
train_file = f'{data_dir}/train.jsonl'
dev_file = f'{data_dir}/dev_seen.jsonl'

def load_jsonl(file_path):
    with open(file_path, 'r') as f:
        return [json.loads(line) for line in f]

# Load JSONL data and labels
def load_data_and_labels(file_path):
    data = load_jsonl(file_path)
    image_filenames = [entry['img'] for entry in data]
    labels = [entry['label'] for entry in data]
    return image_filenames, labels

train_filenames, train_labels = load_data_and_labels(train_file)
dev_filenames, dev_labels = load_data_and_labels(dev_file)

def create_image_dataset(filenames, labels, image_features):
    filtered_filenames, filtered_labels = [], []
    for filename, label in zip(filenames, labels):
        key = filename.split('/')[-1]
        if key in image_features:
            filtered_filenames.append(filename)
            filtered_labels.append(label)

    image_vectors = [image_features[filename.split('/')[-1]] for filename in filtered_filenames]
    image_tensor = torch.tensor(image_vectors, dtype=torch.float32)
    labels_tensor = torch.tensor(filtered_labels, dtype=torch.long)
    dataset = TensorDataset(image_tensor, labels_tensor)
    return dataset

train_dataset = create_image_dataset(train_filenames, train_labels, image_features)
dev_dataset = create_image_dataset(dev_filenames, dev_labels, image_features)

# Model definition
class ImageClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ImageClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize the classifier and move to GPU
input_dim = 1000  # Adjust to match the dimensionality of the image features
hidden_dim = 128
output_dim = 1
classifier = ImageClassifier(input_dim, hidden_dim, output_dim).to(device)

# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(classifier.parameters(), lr=1e-3)

# Set maximum batch size by memory limit (150MB in KB)
max_batch_size_kb = 150 * 1024  # 150 MB

# Helper function to process batches by cumulative memory limit
def process_batches(dataset):
    cumulative_size_kb = 0
    batch_data, batch_labels = [], []
    data_loader = []

    for features, label in dataset:
        feature_size_kb = features.numel() * 4 / 1024  # Assuming float32 (4 bytes per element)

        if cumulative_size_kb + feature_size_kb <= max_batch_size_kb:
            batch_data.append(features)
            batch_labels.append(label)
            cumulative_size_kb += feature_size_kb
        else:
            data_loader.append((torch.stack(batch_data), torch.tensor(batch_labels)))
            batch_data, batch_labels = [features], [label]
            cumulative_size_kb = feature_size_kb

    if batch_data:
        data_loader.append((torch.stack(batch_data), torch.tensor(batch_labels)))

    return data_loader

# Training and evaluation
num_epochs = 25
for epoch in range(num_epochs):
    classifier.train()
    epoch_loss = 0
    train_batches = process_batches(train_dataset)

    print(f"\nEpoch {epoch + 1}/{num_epochs} - Training:")
    for batch_idx, (features, labels) in enumerate(tqdm(train_batches, desc=f"Training Batch Progress")):
        features, labels = features.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = classifier(features)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

        # Display batch-level loss
        print(f"Batch {batch_idx + 1}/{len(train_batches)} - Loss: {loss.item():.4f}")

    avg_epoch_loss = epoch_loss / len(train_batches)
    print(f"Epoch {epoch + 1} - Average Training Loss: {avg_epoch_loss:.4f}")

    # Evaluation on the development set
    classifier.eval()
    all_preds, all_labels = [], []
    dev_batches = process_batches(dev_dataset)

    print(f"\nEpoch {epoch + 1}/{num_epochs} - Evaluation:")
    with torch.no_grad():
        for batch_idx, (features, labels) in enumerate(tqdm(dev_batches, desc="Evaluation Batch Progress")):
            features, labels = features.to(device), labels.to(device)
            outputs = classifier(features)
            preds = torch.sigmoid(outputs.squeeze()) > 0.5

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='binary')
    auc_roc = roc_auc_score(all_labels, all_preds)

    print(f"Epoch {epoch + 1} - Dev Set Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"AUC-ROC: {auc_roc:.4f}")

# Save the model
torch.save(classifier.state_dict(), '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/image_classifier_model.pth')


### Multimodal Classification - Early Fusion

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load image features and text features
image_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/image_features.npy', allow_pickle=True).item()
train_text_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/train_text_features.npy')
dev_text_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/dev_text_features.npy')

# Path to JSONL files with labels (modify paths as necessary)
train_file = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/train.jsonl'
dev_file = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/dev_seen.jsonl'

# Function to load JSONL data and labels
def load_labels(file_path):
    with open(file_path, 'r') as f:
        data = [json.loads(line) for line in f]
    labels = [entry['label'] for entry in data]
    return labels

# Load labels
train_labels = load_labels(train_file)
dev_labels = load_labels(dev_file)

# Prepare training and dev datasets with feature-level fusion
def create_fusion_dataset(image_features, text_features, labels):
    # Assumes image_features is a dict with filename keys, and text_features is an array aligned with the labels
    combined_features = []
    for idx, (filename, label) in enumerate(zip(image_features.keys(), labels)):
        image_feature = image_features[filename]
        text_feature = text_features[idx]
        fused_feature = np.concatenate((image_feature, text_feature))  # Feature-level fusion by concatenation
        combined_features.append((fused_feature, label))

    # Convert to tensors
    feature_tensors = torch.tensor([f[0] for f in combined_features], dtype=torch.float32)
    label_tensors = torch.tensor([f[1] for f in combined_features], dtype=torch.long)
    return TensorDataset(feature_tensors, label_tensors)

train_dataset = create_fusion_dataset(image_features, train_text_features, train_labels)
dev_dataset = create_fusion_dataset(image_features, dev_text_features, dev_labels)

# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

# Define the multimodal classifier model
class MultimodalClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MultimodalClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize the model
input_dim = train_dataset[0][0].shape[0]  # Dimension of concatenated image and text features
hidden_dim = 128
output_dim = 1  # Binary classification
model = MultimodalClassifier(input_dim, hidden_dim, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training and evaluation loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device, dtype=torch.float32)

        optimizer.zero_grad()
        outputs = model(features).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)

    # Evaluation
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for features, labels in dev_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features).squeeze()
            preds = torch.sigmoid(outputs) > 0.5
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='binary')
    auc_roc = roc_auc_score(all_labels, all_preds)

    print(f"Epoch {epoch + 1}/{num_epochs}")
    print(f"Training Loss: {avg_train_loss:.4f}")
    print(f"Dev Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, AUC-ROC: {auc_roc:.4f}")

# Save the model
torch.save(model.state_dict(), '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/multimodal_classifier_model.pth')
print("Model saved to 'multimodal_classifier_model.pth'")


### Multimodal Classification - Late Fusion

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load image features and text features
image_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/image_features.npy', allow_pickle=True).item()
train_text_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/train_text_features.npy')
dev_text_features = np.load('/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/dev_text_features.npy')

# Path to JSONL files with labels
train_file = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/train.jsonl'
dev_file = '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/dev_seen.jsonl'

# Function to load labels
def load_labels(file_path):
    with open(file_path, 'r') as f:
        data = [json.loads(line) for line in f]
    labels = [entry['label'] for entry in data]
    return labels

# Load labels
train_labels = load_labels(train_file)
dev_labels = load_labels(dev_file)

# Prepare individual datasets for image and text features
def create_dataset(features, labels):
    # Ensure features and labels have the same length
    min_len = min(len(features), len(labels))
    features = features[:min_len]
    labels = labels[:min_len]

    feature_tensors = torch.tensor(features, dtype=torch.float32)
    label_tensors = torch.tensor(labels, dtype=torch.long)
    return TensorDataset(feature_tensors, label_tensors)

train_image_features = [image_features[filename] for filename in image_features.keys()]
dev_image_features = [image_features[filename] for filename in image_features.keys()]

train_image_dataset = create_dataset(train_image_features, train_labels)
dev_image_dataset = create_dataset(dev_image_features, dev_labels)

train_text_dataset = create_dataset(train_text_features, train_labels)
dev_text_dataset = create_dataset(dev_text_features, dev_labels)

# Data loaders
batch_size = 32
train_image_loader = DataLoader(train_image_dataset, batch_size=batch_size, shuffle=True)
dev_image_loader = DataLoader(dev_image_dataset, batch_size=batch_size, shuffle=False)

train_text_loader = DataLoader(train_text_dataset, batch_size=batch_size, shuffle=True)
dev_text_loader = DataLoader(dev_text_dataset, batch_size=batch_size, shuffle=False)

# Define individual classifiers
class SingleModalClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SingleModalClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize image and text classifiers
input_dim_image = train_image_features[0].shape[0]
input_dim_text = train_text_features.shape[1]  # Assuming train_text_features is a 2D array
hidden_dim = 128
output_dim = 1  # Binary classification

image_classifier = SingleModalClassifier(input_dim_image, hidden_dim, output_dim).to(device)
text_classifier = SingleModalClassifier(input_dim_text, hidden_dim, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
image_optimizer = optim.Adam(image_classifier.parameters(), lr=1e-3)
text_optimizer = optim.Adam(text_classifier.parameters(), lr=1e-3)

# Training and evaluation loop
num_epochs = 25
for epoch in range(num_epochs):
    image_classifier.train()
    text_classifier.train()

    total_image_loss, total_text_loss = 0, 0

    # Train image classifier
    for features, labels in train_image_loader:
        features, labels = features.to(device), labels.to(device, dtype=torch.float32)

        image_optimizer.zero_grad()
        outputs = image_classifier(features).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        image_optimizer.step()
        total_image_loss += loss.item()

    avg_image_loss = total_image_loss / len(train_image_loader)

    # Train text classifier
    for features, labels in train_text_loader:
        features, labels = features.to(device), labels.to(device, dtype=torch.float32)

        text_optimizer.zero_grad()
        outputs = text_classifier(features).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        text_optimizer.step()
        total_text_loss += loss.item()

    avg_text_loss = total_text_loss / len(train_text_loader)

    # Evaluation
    image_classifier.eval()
    text_classifier.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for (image_features, labels), (text_features, _) in zip(dev_image_loader, dev_text_loader):
            image_features, text_features, labels = image_features.to(device), text_features.to(device), labels.to(device)

            # Get predictions from each classifier
            image_outputs = torch.sigmoid(image_classifier(image_features).squeeze())
            text_outputs = torch.sigmoid(text_classifier(text_features).squeeze())

            # Decision fusion (average the predictions)
            combined_outputs = (image_outputs + text_outputs) / 2
            preds = combined_outputs > 0.5

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='binary')
    auc_roc = roc_auc_score(all_labels, all_preds)

    print(f"Epoch {epoch + 1}/{num_epochs}")
    print(f"Image Training Loss: {avg_image_loss:.4f}, Text Training Loss: {avg_text_loss:.4f}")
    print(f"Dev Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, AUC-ROC: {auc_roc:.4f}")

# Save the models
torch.save(image_classifier.state_dict(), '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/multi_modal_late_fusion_image_classifier_model.pth')
torch.save(text_classifier.state_dict(), '/content/drive/MyDrive/Machine Learning/Hateful Meme Dataset/hateful_memes/multi_modal_late_fusion_text_classifier_model.pth')
print("Models saved.")
