This code is designed to evaluate the hatefulness of memes using image processing, text extraction, and AI models. Here's a breakdown of the main components and functionality.

In [2]:
import os
import json
import numpy as np
from PIL import Image
import cv2
import pytesseract
import easyocr
from transformers import BlipProcessor, BlipForConditionalGeneration
import openai
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, roc_auc_score
import base64
from tqdm import tqdm

pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/Cellar/tesseract/5.3.4_1/bin/tesseract'
reader = easyocr.Reader(['en'])
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("mps")

def convert_image_to_base64(image_path): #converts the image to base64 format that is readable by the model.
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def preprocess_final(im): #Applies filters and thresholding to preprocess an image for OCR
    im = cv2.bilateralFilter(im, 5, 55, 60)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    _, im = cv2.threshold(im, 245, 255, cv2.THRESH_BINARY)
    return im

def process_image(image_path): #Processes an image to extract text using Tesseract OCR and generates a caption using the BLIP model
    custom_config = r"--oem 3 --psm 11 -c tessedit_char_whitelist= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ '"
    img = np.array(Image.open(image_path))
    im = preprocess_final(img)
    text = pytesseract.image_to_string(im, lang='eng', config=custom_config).replace('\n', '')
    
    raw_image = Image.open(image_path).convert('RGB')
    inputs = processor(raw_image, return_tensors="pt").to("mps")
    out = model.generate(**inputs)
    caption_result = processor.decode(out[0], skip_special_tokens=True)
    
    return text, caption_result

def generate_prompt(filtered_text, caption_result, strategy): #Generates a prompt for the AI model based on different strategies.
    strategies = {
        1: f"Is this meme hateful? Answer yes or no.",
        2: f"We have a meme image with the text '{filtered_text}' and caption for the image '{caption_result}'. Is this meme hateful? Answer yes or no", # zero-shot
        3: f"""We have a meme image with the text 'meanwhile at the isis strip club' and caption for the image 'there are two men that are standing in front of a herd of sheep'. Is this meme hateful? The answer is No.
                We have a meme image with the text 'last time i checked.. i played army as a kid, not air force' and caption for the image 'there is a little boy in a camouflage uniform holding a gun'. Is this meme hateful? Answer is No.
                We have a meme image with the text 'schnazi the dachshhund \"i have eliminated the chews\"' and caption for the image 'there are two dogs that are playing with each other'. Is this meme hateful? Answer is Yes.
                We have a meme image with the text 'will you hurry up and die you chocolatey little fuck i'm fucking starving' and caption for the image 'there are two pictures of a baby and a vulture'. Is this meme hateful? Answer is Yes.
                We have a meme image with the text '{filtered_text}' and caption for the image '{caption_result}'. Is this meme hateful? Give your answer, yes or no""",
        4: f"""We have a meme image with the text 'meanwhile at the isis strip club' and caption for the image 'there are two men that are standing in front of a herd of sheep'. Is this meme hateful? The meme is an attempt at dark humor or satire, aiming to elicit laughter through shock value or absurdity rather than to spread hate, so the answer is No.
                We have a meme image with the text 'last time i checked.. i played army as a kid, not air force' and caption for the image 'there is a little boy in a camouflage uniform holding a gun'. Is this meme hateful? The humor appears to be based on the idea that the child is seriously contemplating their role-playing preferences from childhood, so the answer is No.
                We have a meme image with the text 'schnazi the dachshhund \"i have eliminated the chews\"' and caption for the image 'there are two dogs that are playing with each other'. Is this meme hateful? It insults Jews by making fun of the systematic killing of Jews by Nazi Germany in World War II, so the answer is Yes.
                We have a meme image with the text 'will you hurry up and die you chocolatey little fuck i'm fucking starving' and caption for the image 'there are two pictures of a baby and a vulture'. Is this meme hateful? It mocks Africans for being in a famine, so the answer is Yes.
                We have a meme image with the text '{filtered_text}' and caption for the image '{caption_result}'. Is this meme hateful? Give your answer, yes or no""",
        5: f"We have a meme image with the text '{filtered_text}' and caption for the image '{caption_result}'. Is this meme hateful? Answer yes or no", # zero-shot
        6: f"""We have a meme image with the text 'meanwhile at the isis strip club' and caption for the image 'there are two men that are standing in front of a herd of sheep'. Is this meme hateful? The answer is No.
                We have a meme image with the text 'last time i checked.. i played army as a kid, not air force' and caption for the image 'there is a little boy in a camouflage uniform holding a gun'. Is this meme hateful? Answer is No.
                We have a meme image with the text 'schnazi the dachshhund \"i have eliminated the chews\"' and caption for the image 'there are two dogs that are playing with each other'. Is this meme hateful? Answer is Yes.
                We have a meme image with the text 'will you hurry up and die you chocolatey little fuck i'm fucking starving' and caption for the image 'there are two pictures of a baby and a vulture'. Is this meme hateful? Answer is Yes.
                We have a meme image with the text '{filtered_text}' and caption for the image '{caption_result}'. Is this meme hateful? Give your answer, yes or no""",
        7: f"""We have a meme image with the text 'meanwhile at the isis strip club' and caption for the image 'there are two men that are standing in front of a herd of sheep'. Is this meme hateful? The meme is an attempt at dark humor or satire, aiming to elicit laughter through shock value or absurdity rather than to spread hate, so the answer is No.
                We have a meme image with the text 'last time i checked.. i played army as a kid, not air force' and caption for the image 'there is a little boy in a camouflage uniform holding a gun'. Is this meme hateful? The humor appears to be based on the idea that the child is seriously contemplating their role-playing preferences from childhood, so the answer is No.
                We have a meme image with the text 'schnazi the dachshhund \"i have eliminated the chews\"' and caption for the image 'there are two dogs that are playing with each other'. Is this meme hateful? It insults Jews by making fun of the systematic killing of Jews by Nazi Germany in World War II, so the answer is Yes.
                We have a meme image with the text 'will you hurry up and die you chocolatey little fuck i'm fucking starving' and caption for the image 'there are two pictures of a baby and a vulture'. Is this meme hateful? It mocks Africans for being in a famine, so the answer is Yes.
                We have a meme image with the text '{filtered_text}' and caption for the image '{caption_result}'. Is this meme hateful? Give your answer, yes or no"""

    }
    return strategies[strategy]

def check_meme_hatefulness(meme_prompt, strategy, image_base64): #Uses OpenAI's GPT model to check if a meme is hateful based on the provided prompt and strategy.
    if strategy == 1:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "Help me to detect if the meme is a hate-speech meme. I want to see only one word as an answer, 'yes' or 'no'. If you're not sure, give the best guess."},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Is this meme hateful? Answer yes or no."},
                    {"type": "image_url",
                     "image_url": {
                         "url": f"data:image/jpeg;base64,{image_base64}"}}
                ]
            }
        ],
        temperature = 0.2)
        return completion.choices[0].message.content.strip().lower()
    else: 
        completion = client.chat.completions.create(
        model="gpt-4o",
            messages=[
                {"role": "system", "content": "Help me to detect if the meme is a hate-speech meme. I want to see only one word as an answer, 'yes' or 'no'. If you're not sure, give the best guess."},
                {"role": "user", "content": meme_prompt}
            ],
            temperature = 0.2)
        return completion.choices[0].message.content.strip().lower()

def evaluate_predictions(jsonl_file, image_folder): #Evaluates the model's predictions against true labels from a JSONL file and calculates metrics (accuracy and AUROC).
    
    strategy = 1
    output_file = f'results_for_strategy{strategy}.txt'
    with open(jsonl_file, 'r') as f:
        data = {os.path.basename(item['img']): item for item in (json.loads(line) for line in f)}

    y_true = []
    y_pred = []
    image_list = os.listdir(image_folder)
    with open(output_file, 'w') as f:
        for image_name in tqdm(image_list, desc="Processing images"):
            if image_name in data:
                image_path = os.path.join(image_folder, image_name)
                item = data[image_name]
                true_label = item['label']
                annotated_text = item['text']
                if strategy >=1 and strategy <= 4:
                    _, caption_result = process_image(image_path)
                    prompt = generate_prompt(annotated_text, caption_result, strategy)
                    print(item['id'])
                elif strategy >=5:
                    extracted_text, caption_result = process_image(image_path)
                    prompt = generate_prompt(extracted_text, caption_result, strategy)

                if strategy == 1:
                    image_base64 = convert_image_to_base64(image_path)
                    prediction = check_meme_hatefulness(prompt, strategy, image_base64)
                else:
                    prediction = check_meme_hatefulness(prompt, strategy, image_base64 = None)
                    
                prediction_label = 1 if prediction == 'yes' else 0
                
                y_true.append(true_label)
                y_pred.append(prediction_label)
                
                f.write(f"Image ID: {item['id']}, Prediction: {prediction_label}, True Label: {true_label}\n")
    
    accuracy = accuracy_score(y_true, y_pred)
    auroc = roc_auc_score(y_true, y_pred)

    print(f"Accuracy: {accuracy:.2f}")
    print(f"AUROC: {auroc:.2f}")
    
    metrics = {
        'Accuracy': accuracy,
        'AUROC': auroc
    }

    plt.figure(figsize=(10, 6))
    plt.bar(metrics.keys(), metrics.values(), color='skyblue')
    plt.xlabel('Metrics')
    plt.ylabel('Scores')
    plt.title('Evaluation Metrics for Meme Hatefulness Detection')
    plt.ylim(0, 1) 
    plt.show()

# Example usage
image_folder = 'data/img'
jsonl_file = 'data/train.jsonl'
evaluate_predictions(jsonl_file, image_folder)

  from pandas.core import (


KeyboardInterrupt: 