In [1]:
import yaml
import requests
from math import inf
from time import time
from os.path import join, dirname, exists
from os import environ, makedirs
from operator import itemgetter


WILDCARDS_DIR = environ.get("WILDCARDS_DIR", "")
CHARACTERS_WILDCARD_PATH = join(WILDCARDS_DIR, "characters.yaml")
POPULAR_WILDCARD_PATH = join(WILDCARDS_DIR, "popular.yaml")

In [2]:
def get_tags(copyright: str, page_no: int) -> list[str]:
    """Get tag information from Danbooru webpage"""
    base_url = "https://danbooru.donmai.us/tags.json"
    queries = [
        "commit=Search",
        f"page={page_no}",
        "search[category]=4",  # 4: character
        "search[has_wiki_page]=yes",  # Only tags with wiki pages
        "search[hide_empty]=yes",
        f"search[name_or_alias_matches]=*({copyright})",
        "search[order]=count",
    ]
    joined_query = "&".join(queries)
    url = f"{base_url}?{joined_query}"
    response = requests.get(url)
    response.raise_for_status()
    tag_infos = response.json()

    # Select only tag name and post count
    return list(map(itemgetter("name"), tag_infos))


def get_related_tags(tag: str) -> list[dict]:
    """Get related tag information from Danbooru webpage"""
    base_url = "https://danbooru.donmai.us/related_tag.json"
    queries = [
        "commit=Search",
        "search[category]=General",
        f"search[query]={tag}",
    ]
    joined_query = "&".join(queries)
    url = f"{base_url}?{joined_query}"
    response = requests.get(url)
    response.raise_for_status()
    related_tag_infos = response.json()["related_tags"]

    # Select only tag names
    return list(map(lambda x: x["tag"]["name"], related_tag_infos))


def exclusive_filtering(related_tags: list[str], filters: list[str]) -> bool:
    """Filter tags by exclusive filtering
    TODO: Only sex filtering is implemented. More filterings are needed

    Example:
    related_tags = ["1boy", "1girl", ...]
    filters = ["1girl", ...]
    -> return False

    related_tags = ["1girl", "1other", ...]
    filters = ["1girl", ...]
    -> return True
    """
    sexes = {"1girl", "1boy", "1other"}
    for option_tags in [sexes]:
        selected_tag = [tag for tag in filters if tag in option_tags][0]
        for related_tag in related_tags:
            if related_tag in option_tags:
                if related_tag == selected_tag:
                    return True
                else:
                    return False
        else:
            return False


def filter_tags(tags: list[str], filters: list[str]) -> list[str]:
    """Filter tag information by filters"""
    if not filters:
        return tags

    filtered_tags = []
    for tag in tags:
        related_tags = get_related_tags(tag)
        if not exclusive_filtering(related_tags, filters):
            continue
        if any(tag not in related_tags for tag in filters):
            continue
        filtered_tags.append(tag)

    return filtered_tags


def clean_tags(tags: list[str]) -> list[str]:
    """Clean tag information for WebUI"""
    for idx, tag in enumerate(tags):
        tag = tag.replace("_", " ")
        tags[idx] = tag.replace("(", r"\(").replace(")", r"\)")
    return tags


def get_character_tags(
    copyrights: list[str],
    filters: list[str] = ["1girl"],
    n_characters: int = -1,
    allow_alternate_costume: bool = True,
) -> dict[list[str]]:
    """Get character tags from Danbooru webpage
    TODO: allow_alternate_costume
    """
    # Default value is infinite
    n_characters = inf if n_characters == -1 else n_characters

    result = {}
    for copyright in copyrights:
        character_tags = []

        page_no = 1
        while True:
            s_time = time()
            tags = get_tags(copyright, page_no)
            filtered_tags = filter_tags(tags, filters)
            cleaned_tags = clean_tags(filtered_tags)
            if not cleaned_tags:
                break
            character_tags.extend(filtered_tags)
            page_no += 1

            print(
                f"Elapsed time: {time() - s_time:.2f}s | Extracted tags: {filtered_tags}"
            )
            if len(character_tags) >= n_characters:
                break
        result[copyright] = character_tags
    return result


def save_wildcard(character_tags: dict, wildcards_dir: str) -> None:
    """Save character tags as a yaml file"""
    # Log result
    yaml_string = yaml.dump(
        {"characters": character_tags}, allow_unicode=True, sort_keys=False
    )
    print("# Characters")
    print(f"{100*'-'}\n{yaml_string}{100*'-'}")

    # Save result
    if wildcards_dir:
        makedirs(wildcards_dir, exist_ok=True)
        with open(CHARACTERS_WILDCARD_PATH, "r") as f:
            character_dict = yaml.safe_load(f)

        for copyright, tags in character_tags.items():
            updated_tags = character_dict["characters"].get(copyright, [])
            updated_tags.extend(tags)
            character_dict["characters"][copyright] = updated_tags

        with open(CHARACTERS_WILDCARD_PATH, "w") as f:
            yaml.safe_dump(character_dict, f)

In [None]:
character_tags = get_character_tags(
    copyrights=["blue_archive"], filters=["1girl"], n_characters=-1
)
save_wildcard(character_tags, wildcards_dir=WILDCARDS_DIR)

In [22]:
from datetime import datetime
from dateutil.relativedelta import relativedelta


def get_popular_tags(
    date: str | datetime | None = None,
    n_days: int = 1,
    n_images: int | None = None,
    n_pages: int = 1,
    wildcard_path: str | None = None,
) -> list[list[str]]:
    """Get tags of popular images in the date"""
    # Convert date to datetime object
    if date is None:
        # Yesterday is the latest data
        date = datetime.today() - relativedelta(day=1)
    elif isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")

    tags_list = []

    for day in range(n_days):
        cur_date = (date - relativedelta(day=day)).strftime("%Y-%m-%d")
        page = 1
        while True:
            base_url = "https://danbooru.donmai.us/explore/posts/popular.json"
            queries = [
                f"date={cur_date}",
                f"page={page}",
                "scale=day",
            ]
            joined_query = "&".join(queries)
            url = f"{base_url}?{joined_query}"
            response = requests.get(url)
            response.raise_for_status()
            infos = response.json()

            for info in infos:
                cleaned_tags = (
                    info["tag_string"]
                    .replace(" ", ", ")
                    .replace("_", " ")
                    .replace("(", r"\(")
                    .replace(")", r"\)")
                )
                tags_list.append(cleaned_tags)

            if (n_images and len(tags_list) >= n_images) or (page > n_pages):
                break
            page += 1

        if wildcard_path:
            makedirs(dirname(wildcard_path), exist_ok=True)
            if exists(wildcard_path):
                with open(wildcard_path, "r") as f:
                    popular_dict = yaml.safe_load(f)
            else:
                popular_dict = {"popular": {}}

            popular_dict["popular"][date] = tags_list

            with open(wildcard_path, "w") as f:
                yaml.safe_dump(popular_dict, f)

        yaml_string = yaml.dump(
            {"popular": {cur_date: tags_list}}, allow_unicode=True, sort_keys=False
        )
        print("# Poplar")
        print(f"{100*'-'}\n{yaml_string}{100*'-'}")

    return popular_dict

In [24]:
popular = get_popular_tags(n_days=10, wildcard_path=POPULAR_WILDCARD_PATH)

# Poplar
----------------------------------------------------------------------------------------------------
popular:
  '2024-11-01':
  - 1boy, 1girl, against wall, anal, animated, anus, ass, blue eyes, blue hair, clothed
    female nude male, clothed sex, clothing aside, commentary, cum, cum in ass, cum
    in pussy, cum overflow, dark-skinned male, dark skin, doggystyle, ejaculation,
    english commentary, eyeliner, eyeshadow, finger in another's mouth, hair between
    eyes, hair ornament, hairclip, hetero, makeup, nude, open mouth, original, paid
    reward available, panties, panties aside, penis, pumpkin, pussy, saki \(the atko\),
    sex, sex from behind, short hair, solo focus, sound, sound effects, squishsuccubus,
    standing, standing sex, sweat, testicles, the atko, thong, thong aside, uncensored,
    underwear, vaginal, video, voice actor
  - 1girl, absurdres, alternate costume, anus, ass, ass cutout, black footwear, blue
    eyes, blue hair, blue nails, clothing cutout,