In [None]:
import zipfile as zf
files = zf.ZipFile("/content/memes.zip", 'r')
files.extractall('meme')
files.close()

In [None]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import os
import pandas as pd
import easyocr

# Initialize EasyOCR Reader (for English text)
reader = easyocr.Reader(['en', 'hi', 'mr'], gpu=True)

# Initialize the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define the meme image directory
meme_directory = "/content/meme/memes"

# Example function to classify toxicity based on image and text
def classify_toxicity(image_path, text):
    # Open and preprocess the image
    image = Image.open(image_path)

    # Prepare the inputs (image and text) with padding and truncation
    inputs = processor(
        text=[text],
        images=image,
        return_tensors="pt",
        padding=True,  # Padding the sequence to the maximum length
        truncation=True  # Truncating if the sequence exceeds the max length
    )

    # Get the outputs from the model
    outputs = model(**inputs)

    # Extract the image and text features
    image_features = outputs.image_embeds
    text_features = outputs.text_embeds

    # Calculate the similarity score (cosine similarity)
    similarity = torch.cosine_similarity(image_features, text_features)

    # Define a threshold for toxicity (example threshold)
    toxicity_threshold = 0.4

    # If the similarity score is below the threshold, classify as toxic
    label = "Toxic" if similarity < toxicity_threshold else "Non-toxic"

    return label, similarity.item()

# Function to extract text from image using EasyOCR
def extract_text_from_image(image_path):
    # Perform OCR using EasyOCR
    result = reader.readtext(image_path)
    # Combine all the text found in the image
    text = " ".join([entry[1] for entry in result])
    return text.strip()

# Process all meme images in the directory
results = []
for filename in os.listdir(meme_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        image_path = os.path.join(meme_directory, filename)

        # Extract text from image
        text = extract_text_from_image(image_path)

        # If no text was extracted, skip the image
        if not text:
            continue

        # Classify toxicity based on image and extracted text
        label, score = classify_toxicity(image_path, text)

        # Store results
        results.append({
            "image": filename,
            "extracted_text": text,
            "toxicity_label": label,
            "toxicity_score": score
        })

# Convert results into a DataFrame
df = pd.DataFrame(results)

# Save results to a CSV file
df.to_csv("/content/meme/memes/meme_toxicity_results.csv", index=False)

# Print the DataFrame if needed
print(df.head())




Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.0% Complete

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

              image                                     extracted_text  \
0    520_NM_pic.jpg  IGdevilmemez_81७ cooamdep0 २s८l Ic०$ Monkeys क...   
1  151_M_pic_10.jpg  philmyyy We're both dead, but if we're born ag...   
2     43_NM_pic.jpg  When my Bestfriend doesn t tie a Friendship ba...   
3    514_NM_pic.jpg  *Someone :- Tum apne doston ko kis naam se bul...   
4   506_M_pic_2.jpg  १४ Y Ola Nibbi After Sex eleosturk Aaj Usne Mu...   

  toxicity_label  toxicity_score  
0          Toxic        0.372312  
1          Toxic        0.294317  
2          Toxic        0.311084  
3          Toxic        0.308909  
4          Toxic        0.297873  


In [None]:
pip install easyocr

Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)
Downloading easyocr-1.7.2-py3-none-any.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m78.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m422.9/422.9 kB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [6]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import os
import pandas as pd
import easyocr
import cv2

# Initialize EasyOCR Reader for English, Hindi, and Marathi
reader = easyocr.Reader(['en', 'hi'], gpu=True)

# Initialize the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define the meme image directory
meme_directory = "/content/meme/memes"

# Function to classify toxicity based on image and text
def classify_toxicity(image_path, text):
    image = Image.open(image_path)
    inputs = processor(
        text=[text],
        images=image,
        return_tensors="pt",
        padding=True,
        truncation=True
    )
    outputs = model(**inputs)
    image_features = outputs.image_embeds
    text_features = outputs.text_embeds
    similarity = torch.cosine_similarity(image_features, text_features)
    toxicity_threshold = 0.4
    label = "Toxic" if similarity < toxicity_threshold else "Non-toxic"
    return label, similarity.item()

# Function to extract text from image using EasyOCR
def extract_text_from_image(image_path):
    result = reader.readtext(image_path, detail=1)
    extracted_text = " ".join([entry[1] for entry in result])
    return extracted_text.strip()

# Process all meme images in the directory
results = []
for filename in os.listdir(meme_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        image_path = os.path.join(meme_directory, filename)
        text = extract_text_from_image(image_path)
        if not text:
            continue
        label, score = classify_toxicity(image_path, text)
        results.append({
            "image": filename,
            "extracted_text": text,
            "toxicity_label": label,
            "toxicity_score": score
        })

# Convert results into a DataFrame and save to CSV
df = pd.DataFrame(results)
df.to_csv("/content/meme/memes/meme_toxicity_results.csv", index=False)
print(df.head())


              image                                     extracted_text  \
0    520_NM_pic.jpg  IGdevilmemez_81७ cooamdep0 २s८l Ic०$ Monkeys क...   
1  151_M_pic_10.jpg  philmyyy We're both dead, but if we're born ag...   
2     43_NM_pic.jpg  When my Bestfriend doesn t tie a Friendship ba...   
3    514_NM_pic.jpg  *Someone :- Tum apne doston ko kis naam se bul...   
4   506_M_pic_2.jpg  १४ Y Ola Nibbi After Sex eleosturk Aaj Usne Mu...   

  toxicity_label  toxicity_score  
0          Toxic        0.372312  
1          Toxic        0.294317  
2          Toxic        0.311084  
3          Toxic        0.308909  
4          Toxic        0.297873  


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import pandas as pd
import os

# ✅ Load CLIP model and processor
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# ✅ Define dataset path
CSV_FILE = "/content/meme/memes/meme_toxicity_results.csv"
IMAGE_DIR = "/content/meme/memes"

# 🔹 Custom Dataset Class
class MemeDataset(Dataset):
    def __init__(self, csv_file, image_dir, processor):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.processor = processor
        self.label_map = {"Non-toxic": 0, "Toxic": 1}  # Convert labels to numbers

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image_path = os.path.join(self.image_dir, row["image"])
        text = row["extracted_text"]
        label = self.label_map[row["toxicity_label"]]

        # Load image
        image = Image.open(image_path).convert("RGB")

        return image, text, label

# ✅ Custom Collate Function to Handle Variable Length Inputs
def collate_fn(batch):
    images, texts, labels = zip(*batch)

    # Tokenize text and process images
    inputs = processor(text=list(texts), images=list(images), return_tensors="pt", padding=True, truncation=True)

    labels = torch.tensor(labels, dtype=torch.long)  # Convert labels to tensor
    return inputs, labels

# ✅ Define DataLoader with collate_fn
dataset = MemeDataset(CSV_FILE, IMAGE_DIR, processor)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)

# 🔹 Define the classification model
class ToxicityClassifier(nn.Module):
    def __init__(self, clip_model):
        super(ToxicityClassifier, self).__init__()
        self.clip = clip_model
        self.fc = nn.Linear(512, 2)  # 512 is CLIP embedding size

    def forward(self, inputs):
        outputs = self.clip(**inputs)
        features = outputs.image_embeds + outputs.text_embeds  # Fuse image & text features
        logits = self.fc(features)
        return logits

# ✅ Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ToxicityClassifier(clip_model).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# 🔹 Training Loop
EPOCHS = 5
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for batch_inputs, batch_labels in dataloader:
        batch_inputs = {key: val.to(device) for key, val in batch_inputs.items()}
        batch_labels = batch_labels.to(device)

        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss / len(dataloader)}")

# ✅ Save the trained model
MODEL_PATH = "/content/meme/meme_toxicity_classifier.pth"
torch.save(model.state_dict(), MODEL_PATH)
print(f"✅ Model saved at: {MODEL_PATH}")


Epoch 1/5, Loss: 0.14073807795842488
Epoch 2/5, Loss: 0.07566362790763378
Epoch 3/5, Loss: 0.05720887634903193
Epoch 4/5, Loss: 0.0510748348881801
Epoch 5/5, Loss: 0.040846751412997644
✅ Model saved at: /content/meme/meme_toxicity_classifier.pth


In [16]:
# ✅ Load trained model
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model = model.to(device)
model.eval()

# ✅ Define prediction function
def predict_toxicity(image_path, text, model, processor):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(text=[text], images=image, return_tensors="pt", padding=True, truncation=True)
    inputs = {key: val.to(device) for key, val in inputs.items()}

    with torch.no_grad():
        logits = model(inputs)
        prediction = torch.argmax(logits, dim=1).item()

    return "Toxic" if prediction == 1 else "Non-toxic"

# # ✅ Test on a new image
test_image = "/content/meme/memes/100_NM_pic.jpg"
# test_text = "This is a test meme text"  # Replace with extracted OCR text
# result = predict_toxicity(test_image, test_text, model, processor)
# print(test_text)
# print("Prediction:", result)

# # ✅ Extract text dynamically from the image using EasyOCR
test_text = extract_text_from_image(test_image)

# ✅ Now predict toxicity using real extracted text
result = predict_toxicity(test_image, test_text, model, processor)

print("Extracted Text:", test_text)
print("Prediction:", result)



  model.load_state_dict(torch.load(MODEL_PATH, map_location=device))


Extracted Text: IG HASTE RAHO Girlfriendban jao DaiyMilkSilk gftkarunga. OHaste _raho Achao Alpenliebedungao
Prediction: Toxic
