In [2]:
import re
import sys
import bs4
import json
import httpx
import pathlib

from src.read import ImageData
from src.color import ColorPrint
from src.color import console_color as console
from src.util import parse_weighted_prompt_tags

cprint = ColorPrint()

In [15]:
imagefile = pathlib.Path("sample images", "safe.jpeg")
imagefile = pathlib.Path("sample images", "00002-208470858.png")
imagefile = pathlib.Path("sample images", "00006-1357204631.png")
imagefile = pathlib.Path("sample images", "2025-02-13T15.44.34_1.jpg")
img = ImageData(imagefile)

def parse_basic(raw_prompt_string):
    match = re.search(r"(?P<prompts>.*?)(?P<settings>steps:.*)", raw_prompt_string, re.DOTALL | re.IGNORECASE)
    prompts = match.groupdict()["prompts"]
    settings = match.groupdict()["settings"]

    # --------------------------------------- #
    # Prompt strings
    # --------------------------------------- #
    match = re.search(r"^(?P<pos>.*?)(?:\bnegative prompt:\s*(?P<neg>.*))?$", prompts, re.DOTALL | re.IGNORECASE)

    positive_prompt = match.groupdict()["pos"]
    negative_prompt = match.groupdict()["neg"]


    if isinstance(positive_prompt, str):
        positive_prompt = positive_prompt.strip().strip(",").strip()
    if isinstance(negative_prompt, str):
        negative_prompt = negative_prompt.strip().strip(",").strip()

    # --------------------------------------- #
    # Read LoRA and Embedding data
    # --------------------------------------- #
    lora_weights = re.findall(r"<lora:(?P<name>.*?):?(?P<weight>\d+|\d+\.\d+)?>", positive_prompt, re.DOTALL | re.IGNORECASE)
    embed_weights = re.findall(r"<embed:(?P<name>.*?):?(?P<weight>\d+|\d+\.\d+)?>", positive_prompt, re.DOTALL | re.IGNORECASE)
    lora_weights = [[x[0], float(x[1])] for x in lora_weights]
    embed_weights = [[x[0], float(x[1])] for x in embed_weights]

    # --------------------------------------- #
    # Get Weighted Prompt Tags
    # --------------------------------------- #
    positive_prompt_weight_tags = parse_weighted_prompt_tags(positive_prompt)
    negative_prompt_weight_tags = parse_weighted_prompt_tags(negative_prompt)

    # --------------------------------------- #
    # Separate Settings and CivitAI data
    # --------------------------------------- #
    match = re.search(r"(?P<settings>.*?)($|(?P<civitai>civitai resources:.*))", settings, re.DOTALL | re.IGNORECASE)
    settings = match.groupdict()["settings"]
    civitai = match.groupdict()["civitai"]

    # --------------------------------------- #
    # Parse Settings into dictionary
    # --------------------------------------- #
    settings_dict: dict | None = None
    
    if settings:
        # Updated regex to properly capture JSON
        pattern = re.compile(
            r"(\w[\w\s]*?):\s*(\{.*?\}|\[[^\]]*\]|\"[^\"]*\"|[^,\n]+)",
            re.DOTALL | re.IGNORECASE
        )

        pairs = pattern.findall(settings)

        # Convert to dictionary
        settings_dict = {key.strip(): value.strip() for key, value in pairs}
    
    standard_keys = {}
    for setting_key in [_ for _ in settings_dict.keys()]:
        if "steps" in setting_key.lower():
            settings_dict["steps"] = settings_dict.pop(setting_key)
            standard_keys["steps"] = setting_key
        elif "cfg scale" == setting_key.lower():
            settings_dict["cfg_scale"] = settings_dict.pop(setting_key)
            standard_keys["cfg_scale"] = setting_key
        elif "seed" in setting_key.lower():
            settings_dict["seed"] = settings_dict.pop(setting_key)
            standard_keys["seed"] = setting_key
        elif "size" in setting_key.lower():
            settings_dict["size"] = settings_dict.pop(setting_key)
            standard_keys["size"] = setting_key
        elif "clip skip" == setting_key.lower():
            settings_dict["clip_skip"] = settings_dict.pop(setting_key)
            standard_keys["clip_skip"] = setting_key
        elif "denoising strength" == setting_key.lower():
            settings_dict["denoising_strength"] = settings_dict.pop(setting_key)
            standard_keys["denoising_strength"] = setting_key
        elif "model" == setting_key.lower():
            settings_dict["model"] = settings_dict.pop(setting_key)
            standard_keys["model"] = setting_key
        elif "model hash" in setting_key.lower():
            settings_dict["model_hash"] = settings_dict.pop(setting_key)
            standard_keys["model_hash"] = setting_key
        elif "lora hashes" == setting_key.lower():
            lora_hashes: str = settings_dict.pop(setting_key)
            lora_hashes = lora_hashes.strip().strip('\"').strip()
            lora_hash_data = {}
            for lh in lora_hashes.split(","):
                lora_name, lora_hash_id = lh.split(":")
                lora_hash_data[lora_name.strip()] = lora_hash_id.strip()
            settings_dict["lora_hashes"] = lora_hash_data
            standard_keys["lora_hashes"] = setting_key
        elif "version" == setting_key.lower():
            settings_dict["version"] = settings_dict.pop(setting_key)
            standard_keys["version"] = setting_key
        elif "sampler" == setting_key.lower():
            settings_dict["sampler"] = settings_dict.pop(setting_key)
            standard_keys["sampler"] = setting_key
        elif "schedule type" == setting_key.lower():
            settings_dict["schedule_type"] = settings_dict.pop(setting_key)
            standard_keys["schedule_type"] = setting_key
        elif "vae" == setting_key.lower():
            settings_dict["vae"] = settings_dict.pop(setting_key)
            standard_keys["vae"] = setting_key
        
    for setting_key in [_ for _ in settings_dict.keys()]:
        if setting_key not in standard_keys:
            settings_dict[f"_{setting_key}"] = settings_dict.pop(setting_key)




    generation_data = {
        "positive": positive_prompt,
        "negative": negative_prompt,
        "loras": lora_weights,
        "embeds": embed_weights,
        "custom_tag_weights": {
            "positive": positive_prompt_weight_tags,
            "negative": negative_prompt_weight_tags
        }, 
        "settings": settings_dict,
        # "resources": resources,
        # "metadata": metadata
    }
    
    return generation_data

console.print(parse_basic(img.raw_prompt))
print()
print(img.raw_prompt)


Image of a star-abundant galaxy, a bright and colorful nebula, vacuum of space, masterpiece, best quality, absurdres, very aesthetic
Negative prompt:  worst quality, low quality, normal quality, jpeg artifacts, lowres, clouds, humans, aliens, beings, animals, easynegative
Steps: 28, Sampler: Euler a, CFG scale: 8, Seed: 2046943637, Size: 1216x832, Clip skip: 2, Created Date: 2025-02-13T15:44:43.0057013Z, Civitai resources: [{"type":"checkpoint","modelVersionId":403131,"modelName":"Animagine XL V3.1","modelVersionName":"v3.1"},{"type":"embed","weight":1,"modelVersionId":9208,"modelName":"EasyNegative","modelVersionName":"EasyNegative"},{"type":"lora","weight":1.05,"modelVersionId":135867,"modelName":"Detail Tweaker XL","modelVersionName":"v1.0"},{"type":"vae","weight":1,"modelVersionId":669051,"modelName":"Liquid111VAE \u0026 Liquid9745VAE","modelVersionName":"SDXL_9745VAE"},{"type":"embed","weight":1,"modelVersionId":106916,"modelName":"Civitai Safe Helper","modelVersionName":"v1.0"}]

In [7]:
with open("temp.html", "r") as html:
    soup = bs4.BeautifulSoup(html, "html.parser")
    for inp in soup.find_all("input"):
        value = inp.attrs["value"]
        key = value.upper().replace(" ", "_")
        print('{} = "{}"'.format(key, value))

DAY = "Day"
WEEK = "Week"
MONTH = "Month"
YEAR = "Year"
ALLTIME = "AllTime"
 = ""
 = ""
 = ""
CHECKPOINT = "Checkpoint"
TEXTUALINVERSION = "TextualInversion"
HYPERNETWORK = "Hypernetwork"
AESTHETICGRADIENT = "AestheticGradient"
LORA = "LORA"
LOCON = "LoCon"
DORA = "DoRA"
CONTROLNET = "Controlnet"
UPSCALER = "Upscaler"
MOTIONMODULE = "MotionModule"
VAE = "VAE"
POSES = "Poses"
WILDCARDS = "Wildcards"
WORKFLOWS = "Workflows"
DETECTION = "Detection"
OTHER = "Other"
ALL = "all"
TRAINED = "Trained"
MERGE = "Merge"
SAFETENSOR = "SafeTensor"
PICKLETENSOR = "PickleTensor"
GGUF = "GGUF"
DIFFUSERS = "Diffusers"
CORE_ML = "Core ML"
ONNX = "ONNX"
SD_1.4 = "SD 1.4"
SD_1.5 = "SD 1.5"
SD_1.5_LCM = "SD 1.5 LCM"
SD_1.5_HYPER = "SD 1.5 Hyper"
SD_2.0 = "SD 2.0"
SD_2.1 = "SD 2.1"
SDXL_1.0 = "SDXL 1.0"
SD_3 = "SD 3"
SD_3.5 = "SD 3.5"
SD_3.5_MEDIUM = "SD 3.5 Medium"
SD_3.5_LARGE = "SD 3.5 Large"
SD_3.5_LARGE_TURBO = "SD 3.5 Large Turbo"
PONY = "Pony"
FLUX.1_S = "Flux.1 S"
FLUX.1_D = "Flux.1 D"
AURAFLOW = "Au