In [1]:
import os, json, base64, glob
from pathlib import Path
from openai import OpenAI
from moderation_agent import ModerationAgent
from PIL import Image
from image_utils import maybe_downscale

In [2]:
MODEL_VISION_AGENT = "gpt-4o"

with open("credentials.txt", "rb") as f:
    open_ai_key = f.readline().decode('ascii')

client = OpenAI(api_key=open_ai_key)

In [3]:
# def b64_image(path):
#     with open(path, "rb") as f:
#         return "data:image/" + Path(path).suffix[1:].lower() + ";base64," + base64.b64encode(f.read()).decode("utf-8")

In [4]:
image_paths = glob.glob("C:\\Users\\benja\\Desktop\\prompt_updates\\images\\*.png")
# image_paths = [p for p in image_paths if "jesus" in p or "clown" in p or "drinking" in p or "sexy" in p]
image_paths

['C:\\Users\\benja\\Desktop\\prompt_updates\\images\\ah.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\ah_mii.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\call_me_naked_alien.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\captain_beer_mcdonalds.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\cultural_appropriation_clown.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\cultural_appropriation_clown_2.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\disney_underwear.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\drinking_godess.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\ducky_jesus.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\dumpster.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\elon.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\fat_losers.png',
 'C:\\Users\\benja\\Desktop\\prompt_updates\\images\\fight_night.png',
 'C:\\Users\\benja\\Desktop\\prompt_up

In [5]:
# image_dict = {os.path.basename(path).split(".")[0]: b64_image(path) for path in image_paths}

In [4]:
def load_image(path: Path):
    image = Image.open(path)
    image = maybe_downscale(image, 500)
    buf = io.BytesIO()
    image.save(buf, format="PNG")
    img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
    return img_b64


In [5]:
image_dict = {os.path.basename(p).split('.')[0]: load_image(p) for p in image_paths}

In [6]:
def read_definitions(dir_path:str):
    with open(f"{dir_path}/few_shot_examples.txt", 'rb') as few_shot_file:
        _few_shot_examples = few_shot_file.read().decode('utf-8')

    with open(f"{dir_path}/moderation_taxonomy.json", 'rb') as taxonomy_file:
        _moderation_taxonomy = json.load(taxonomy_file)
        _moderation_taxonomy = json.dumps(_moderation_taxonomy, ensure_ascii=False)

    with open(f"{dir_path}/system_spine.txt", 'rb') as system_spine_file:
        _system_spine = system_spine_file.read().decode('utf-8')

    with open(f"{dir_path}/user_prompt.txt", "rb") as user_prompt_file:
        _user_prompt = user_prompt_file.read().decode('utf-8').strip()

    return _system_spine, _user_prompt, _moderation_taxonomy, _few_shot_examples

In [7]:
system_spine, user_prompt, moderation_taxonomy, few_shot_examples = read_definitions("definitions_v0")

In [8]:
agent_0 = ModerationAgent(
    client=client,
    vision_model=MODEL_VISION_AGENT,
    system_spine=system_spine,
    few_shot_examples=few_shot_examples,
    moderation_taxonomy=moderation_taxonomy,
    user_prompt=user_prompt
)

In [None]:
taxonomy_results_0 = agent_0.moderate_images(image_dict)

In [10]:
taxonomy_results_0

{'ah_mii': {'taxonomy_version': '3.0',
  'summary': 'Model resembles a historical figure associated with hate and extremism.',
  'flags': [{'category_id': 'HE3',
    'subcategory_id': 'HE3.1',
    'rationale': 'Model resembles a historical figure known for extremism and hate.',
    'evidence': {'type': 'geometry', 'pointer': 'view#1'},
    'confidence': 0.95}],
  'text_fragments': []},
 'hail_satan': {'taxonomy_version': '3.0',
  'summary': 'Model depicts occult/supernatural themes.',
  'flags': [{'category_id': 'T8',
    'subcategory_id': 'T8.1',
    'rationale': 'Presence of pentagram and ritualistic setting indicating occult/supernatural horror.',
    'evidence': {'type': 'geometry', 'pointer': 'view#1'},
    'confidence': 0.95}],
  'text_fragments': []},
 'hate_shield_2': {'taxonomy_version': '3.0',
  'summary': 'Shield with extremist symbol.',
  'flags': [{'category_id': 'HE3',
    'subcategory_id': 'HE3.2',
    'rationale': 'Extremist propaganda symbol embedded in shield design.'

In [11]:
agent_0_mini = ModerationAgent(
    client=client,
    vision_model="gpt-4o-mini",
    system_spine=system_spine,
    few_shot_examples=few_shot_examples,
    moderation_taxonomy=moderation_taxonomy,
    user_prompt=user_prompt
)
taxonomy_results_0_mini = agent_0_mini.moderate_images(image_dict)

completed elon
completed ah_mii
completed naked_man
completed cultural_appropriation_clown_2
completed fat_losers
completed Jefferson_high
completed hate_shield
completed hate_shield_2
completed ah
completed hail_satan
completed marlboro_bong
completed Join_the_resistence
completed call_me_naked_alien
completed sonic
completed dumpster
completed naked_woman
completed cultural_appropriation_clown
completed yang
completed one_gun_lingerie
completed sexy_buddah
completed captain_beer_mcdonalds
completed disney_underwear
completed drinking_godess
completed fight_night
completed ducky_jesus
completed smoking_mickey
completed tow_guns_lingerie


In [12]:
taxonomy_results_0_mini

{'elon': {'taxonomy_version': '3.0',
  'summary': 'No problematic content detected.',
  'flags': [],
  'text_fragments': []},
 'ah_mii': {'taxonomy_version': '3.0',
  'summary': 'The model depicts a character resembling a historical figure associated with hate and extremism.',
  'flags': [{'category_id': 'HE3',
    'subcategory_id': 'HE3.1',
    'rationale': 'Representation of a historical figure known for extremist ideologies and actions.',
    'evidence': {'type': 'geometry', 'pointer': 'view#'},
    'confidence': 0.9}],
  'text_fragments': []},
 'naked_man': {'taxonomy_version': '3.0',
  'summary': 'The model depicts nudity.',
  'flags': [{'category_id': 'S1',
    'subcategory_id': 'S1.1',
    'rationale': 'The model is fully nude, displaying bare skin.',
    'evidence': {'type': 'geometry', 'pointer': 'view#'},
    'confidence': 1.0}],
  'text_fragments': []},
 'cultural_appropriation_clown_2': {'taxonomy_version': '3.0',
  'summary': 'Cultural appropriation evident in the use of t