In [1]:
from dotenv import load_dotenv
import os
import base64
import requests

# Load environment variables from the .env file
load_dotenv()
API_KEY = os.getenv('OPENAI_API_KEY')

In [2]:
SYSTEM_PROMPT = "You are an AI Assistant that provides image labelling services. You are able to provide detailed, concise, and accurate descriptions of the sticker images that you see. Your response should be only in the format of descriptions separated by commas.\n\nFor example:\n\"A Posting, dynamic style, Animals, robots, dance music, complete picture, cyberpunk style, futuristic technology stickers, simple pattern, Highlighting the background.\""

IMAGE_DIR = os.path.join(os.getcwd(), "../dataset/")

In [21]:
from tqdm import tqdm
import time

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def label_image(image_path):
  # Getting the base64 string
  base64_image = encode_image(image_path)

  headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}"
  }

  payload = {
    "model": "gpt-4o",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": SYSTEM_PROMPT
          },
          {
            "type": "image_url",
            "image_url": {
              "url": f"data:image/png;base64,{base64_image}",
              "detail": "high"
            }
          }
        ]
      }
    ],
    "max_tokens": 300
  }

  response = requests.post("https://api.openai.com/v1/chat/completions",headers=headers, json=payload)
  return response.json()

labels = {}
max_retries = 5
for filename in tqdm(os.listdir(IMAGE_DIR)): # only 54 images
  file_path = os.path.join(IMAGE_DIR, filename)
  
  if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):

    # retry up to 5 times
    for attempt in range(1, max_retries + 1):
        try:
            response = label_image(file_path)
            label = response["choices"][0]["message"]["content"]
            labels[filename] = label
            break  # If operation succeeds, exit the loop
        except Exception as e:
            print(f"Attempt {attempt} failed for image '{filename}' : {e}")
            if attempt < max_retries:
                print("Retrying...")
                time.sleep(1)
            else:
                print("All retry attempts failed")

100%|██████████| 56/56 [02:36<00:00,  2.80s/it]


In [24]:
labels

{'002.png': '"Smiling, blonde hair, short hair, red lipstick, white dress, glamorous, classic Hollywood, caricature, hands on hip, eyes closed"',
 '016.png': '"Cartoon style, man with long blonde hair, red and black striped shirt, smiling face, arms outstretched, friendly gesture, detailed facial features"',
 '017.png': '"Profile view, female character, cartoon style, wearing blue hat, wearing blue dress, angry expression, arms crossed, jewelry, earrings, necklace"',
 '003.png': '"Person, suit and tie, bearded, thumbs up gesture, historical figure, formal attire, realistic style, sticker, approval gesture"',
 '029.png': '"A pointing pose, cartoon style, male figure, formal attire, suit and tie, expressive facial features, three-quarter view, character illustration, solid background."',
 '015.png': '"Cartoon style, elderly man, white beard, serious expression, wearing a suit, holding a cigar, detailed facial features, vintage clothing style, sticker, white outline."',
 '001.png': '"Cart

In [26]:
len(labels)

54

In [25]:
import pandas as pd

# metadata.csv file name is required to upload to hugging face as image dataset
cleaned_metadata = {k: v.replace('"', '') if isinstance(v, str) else v for k, v in labels.items()}
df = pd.DataFrame(list(cleaned_metadata.items()), columns=["file_name", "text"])
df.to_csv(os.path.join(IMAGE_DIR, "metadata.csv"), index=False)

In [3]:
from datasets import load_dataset
dataset = load_dataset("imagefolder", data_dir=IMAGE_DIR, split="train")

  from .autonotebook import tqdm as notebook_tqdm
Downloading data: 100%|██████████| 56/56 [00:00<00:00, 252288.96files/s]
Generating train split: 54 examples [00:00, 64.73 examples/s]


In [4]:
dataset.push_to_hub("STUDs/DiscordDiffusion") # upload to huggingface

Map: 100%|██████████| 54/54 [00:00<00:00, 1164.04 examples/s]?it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 28.75ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:02<00:00,  2.35s/it]


CommitInfo(commit_url='https://huggingface.co/datasets/STUDs/DiscordDiffusion/commit/24701e5049e2c3274525fb03b1507606ce7ede73', commit_message='Upload dataset', commit_description='', oid='24701e5049e2c3274525fb03b1507606ce7ede73', pr_url=None, pr_revision=None, pr_num=None)

In [5]:
# test to see if data is on huggingface
dataset = load_dataset("STUDs/DiscordDiffusion", split="train")

Downloading readme: 100%|██████████| 312/312 [00:00<00:00, 1.28MB/s]
Downloading data: 100%|██████████| 12.5M/12.5M [00:02<00:00, 5.71MB/s]
Generating train split: 100%|██████████| 54/54 [00:00<00:00, 1278.08 examples/s]


In [6]:
dataset # success!

Dataset({
    features: ['image', 'text'],
    num_rows: 54
})