# Toxicity Detection in Memes 

<table align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/georgia-tech-db/eva/blob/master/tutorials/10-toxicity-classifier-huggingface.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" /> Run on Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/georgia-tech-db/eva/blob/master/tutorials/10-toxicity-classifier-huggingface.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" /> View source on GitHub</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/georgia-tech-db/eva/raw/master/tutorials/10-toxicity-classifier-huggingface.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" /> Download notebook</a>
  </td>
</table><br><br>

### Connect to EvaDB

In [1]:
%pip install --quiet "evadb[vision,document,notebook]"
import evadb
cursor = evadb.connect().cursor()

Note: you may need to restart the kernel to use updated packages.


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


### Load the Memes for analysis

In [2]:
!wget -nc "https://raw.githubusercontent.com/georgia-tech-db/toxicity-classification/main/meme1.jpg"
!wget -nc "https://raw.githubusercontent.com/georgia-tech-db/toxicity-classification/main/meme2.jpg"
response = cursor.query('DROP TABLE IF EXISTS MemeImages;').df()
cursor.query('LOAD IMAGE "meme*.jpg" INTO MemeImages;').df()

File 'meme1.jpg' already there; not retrieving.

File 'meme2.jpg' already there; not retrieving.



Unnamed: 0,0
0,Number of loaded IMAGE: 2


### Create OCR Extractor & Toxicity Classification UDF

In [4]:
cursor.query("DROP UDF IF EXISTS OCRExtractor;").df()
cursor.create_udf("OCRExtractor", True, '../evadb/udfs/ocr_extractor.py').df()

cursor.query("""DROP UDF IF EXISTS ToxicityClassifier;""").df()
cursor.create_udf("ToxicityClassifier", True, '../evadb/udfs/toxicity_classifier.py').df()

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
06-17-2023 00:50:35 ERROR [plan_executor:plan_executor.py:execute_plan:0167] Error creating UDF: Couldn't build proto file into descriptor pool: duplicate file name sentencepiece_model.proto
Traceback (most recent call last):
  File "/nethome/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/evadb/executor/create_udf_executor.py", line 148, in _try_initializing_udf
    udf(**udf_args)
  File "/nethome/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/evadb/udfs/abstract/abstract_udf.py", line 33, in __init__
    self.setup(*args, **kwargs)
  File "/nethome/jarulraj3/eva/test_evadb/lib/python3.10/site-packages/evadb/udfs/decorators/decorators.py", line 35, in wrapper
    arg_fn(*args, **kwargs)
  File "/home/jarulraj3/eva/evadb/udfs/ocr_extractor.py", line 43, in setup
    self.processor = DonutProcessor.from_

ExecutorError: Error creating UDF: Couldn't build proto file into descriptor pool: duplicate file name sentencepiece_model.proto

### Run Toxicity Classifier on OCR Extracted from Images

In [None]:
response = (
    cursor.query(
        """SELECT memeimages._row_id, T.label, ToxicityClassifier(label)
                  FROM MemeImages JOIN LATERAL
                  UNNEST(OCRExtractor(data)) AS T(label)
                  ;"""
    )
    .df()
)
response

### Visualize Model Output on Images

In [None]:
from pprint import pprint
from matplotlib import pyplot as plt
import cv2
import numpy as np

def annotate_image(detections, input_image_path, image_id):

    color1=(207, 248, 64)
    color2=(255, 49, 49)
    thickness=4

    df = detections
    df = df.iloc[image_id]

    image = cv2.imread(input_image_path)

    if df.size:
        ocr = df['T.label']
        label = df['toxicityclassifier.label']

        plt.imshow(image)
        plt.show()

        cv2.putText(image, label, (25, 200), cv2.FONT_HERSHEY_SIMPLEX, 3, color2, thickness, cv2.LINE_AA) 

        cv2.putText(image, ocr, (25, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color1, thickness, cv2.LINE_AA) 

        plt.imshow(image)
        plt.show()        

In [None]:
from ipywidgets import Image
annotate_image(response, 'meme1.jpg', image_id=1)
annotate_image(response, 'meme2.jpg', image_id=0)