# Prep Prompt Lists

## Setup

In [None]:
import os


BUCKET_PROMPTS = os.environ.get("bucket_prompts", default="bucket-prompts.csv")
NEGATIVE_PROMPT = os.environ.get("negative_prompt", default="blurry, glasses, "
                                 "sunglasses, goggles, unsharp, bad dream, "
                                 "lowres, text, error, cropped, "
                                 "worst quality, low quality, jpeg artifacts, "
                                 "ugly, duplicate, morbid, mutilated, "
                                 "out of frame, extra fingers, mutated hands, "
                                 "poorly drawn hands, poorly drawn face, "
                                 "mutation, deformed, blurry, dehydrated, "
                                 "bad anatomy, bad proportions, extra limbs, "
                                 "cloned face, disfigured, gross proportions, "
                                 "malformed limbs, missing arms, "
                                 "missing legs, extra arms, extra legs, "
                                 "fused fingers, too many fingers, long neck, "
                                 "username, watermark, signature")
WIDTH = int(os.environ.get("width", default="1024"))
HEIGHT = int(os.environ.get("height", default="1366"))

NUM_START = int(os.environ.get("num_start", default="0"))
NUM_IMAGES = int(os.environ.get("num_images", default="1"))
NUM_WORKERS = int(os.environ.get("num_workers", default="6"))
AWS_S3_PREFIX = os.environ.get("aws_s3_prefix", default="generated-images")


In [None]:
import math
import pandas as pd

df = pd.read_csv(BUCKET_PROMPTS)

# output_df = pd.DataFrame(columns=["prompt", "negative_prompt", "s3 key", "width", "height"])

workers = []
for i in range(0, NUM_WORKERS):
    workers.append(pd.DataFrame(columns=["prompt", "negative_prompt", "s3 key", "width", "height"]))
    
worker_number = 0
total_num_images = 0
for header in df:
    total_num_images += len(df[header]) * NUM_IMAGES
    
num_per_worker = math.ceil(total_num_images / NUM_WORKERS)
count = 0

for header in df:
    prefix = f"{AWS_S3_PREFIX}/{header}"
    prompt_list = df[header]
    num_prompts = len(prompt_list)
    img_num = NUM_START
    for i, row_value in prompt_list.items():
        prompt = prompt_list[i]
        for j in range(0, NUM_IMAGES):
            img_str = str(img_num).zfill(3)
            img_num += 1            
            key = f"{prefix}/{img_str}.png"
            new_row = [prompt, NEGATIVE_PROMPT, key, WIDTH, HEIGHT]
            count += 1
            workers[worker_number].loc[len(workers[worker_number])] = new_row
            if count % num_per_worker == 0:
                worker_number += 1

for i, worker in enumerate(workers):
    print(i)
    worker.to_csv(f"prompts_{i+1}.csv", index=False)