In [1]:
import zipfile as zf
files = zf.ZipFile("/content/50memes.zip", 'r')
files.extractall('newmeme')
files.close()

In [3]:
!pip install easyocr

Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)
Downloading easyocr-1.7.2-py3-none-any.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m422.9/422.9 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (

In [4]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import os
import pandas as pd
import easyocr

In [9]:

reader = easyocr.Reader(['en',], gpu=True)


model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")


meme_directory = "/content/newmeme/50memes"

In [11]:





def classify_toxicity(image_path, text):
    # Open and preprocess the image
    image = Image.open(image_path)

    inputs = processor(
        text=[text],
        images=image,
        return_tensors="pt",
        padding=True,
        truncation=True
    )


    outputs = model(**inputs)


    image_features = outputs.image_embeds
    text_features = outputs.text_embeds


    similarity = torch.cosine_similarity(image_features, text_features)


    toxicity_threshold = 0.25


    label = "Toxic" if similarity > toxicity_threshold else "Non-toxic"

    return label, similarity.item()


def extract_text_from_image(image_path):

    result = reader.readtext(image_path)

    text = " ".join([entry[1] for entry in result])
    return text.strip()


results = []
for filename in os.listdir(meme_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        image_path = os.path.join(meme_directory, filename)

        text = extract_text_from_image(image_path)

        if not text:
            continue

        label, score = classify_toxicity(image_path, text)


        results.append({
            "image": filename,
            "extracted_text": text,
            "toxicity_label": label,
            "toxicity_score": score
        })


df = pd.DataFrame(results)


df.to_csv("/content/meme_toxicity_results.csv", index=False)


print(df.head())

             image                                     extracted_text  \
0            1.png  when your human says "who' $ a good girl?" and...   
1            4.jpg  Aaj Mai Free Hun Kya karu Tumhare Liye G9HaRAM...   
2            7.png  Ihere iStiiference heteel violalingmislis ando...   
3  52_M_pic_10.jpg  SOME MEMES DO NOT REQUIRE CAPTIONS Dollywood o...   
4            5.png  when you"re feeling [orty asilbugou (abibi[s 0...   

  toxicity_label  toxicity_score  
0      Non-toxic        0.230311  
1          Toxic        0.302642  
2      Non-toxic        0.222661  
3      Non-toxic        0.218442  
4          Toxic        0.270221  
