In [None]:
import os
import json
import httpx
from bs4 import BeautifulSoup

class SakugaScraper:
    BASE_URL = "https://www.sakugabooru.com/post/show/{}"

    def __init__(self, root_dir: str):
        self.root_dir = root_dir
        os.makedirs(root_dir, exist_ok=True)

    def fetch_post(self, post_id: str):
        url = self.BASE_URL.format(post_id)
        response = httpx.get(url, timeout=10)
        response.raise_for_status()
        return BeautifulSoup(response.text, "html.parser")

    def extract_metadata(self, soup, post_id: str):
        metadata = {}
        # Extract high-res image link
        highres_link = soup.find("a", id="highres")
        metadata["image_url"] = highres_link["href"] if highres_link else None

        # Extract tags
        tag_sidebar = soup.find("ul", id="tag-sidebar")
        if tag_sidebar:
            for li in tag_sidebar.find_all("li"):
                tag_type = li.get("class", [None])[0]
                tag_name = li.find("a").text.strip() if li.find("a") else "?"
                metadata.setdefault(tag_type, []).append(tag_name)
        metadata["post_id"] = post_id
        return metadata

    def download_image(self, url: str, save_path: str):
        response = httpx.get(url, timeout=10)
        response.raise_for_status()
        with open(save_path, "wb") as f:
            f.write(response.content)

    def scrape_post(self, post_id: str):
        soup = self.fetch_post(post_id)
        metadata = self.extract_metadata(soup, post_id)

        # Prepare directories and file paths
        post_dir = os.path.join(self.root_dir, f"post_{post_id}")
        os.makedirs(post_dir, exist_ok=True)
        ext = metadata["image_url"].split(".")[-1] if metadata["image_url"] else "jpg"
        image_path = os.path.join(post_dir, f"sankaku_{post_id}.{ext}")
        metadata_path = os.path.join(post_dir, f"sankaku_{post_id}.json")

        # Download image and save metadata
        if metadata["image_url"]:
            self.download_image(metadata["image_url"], image_path)
        with open(metadata_path, "w") as f:
            json.dump(metadata, f, indent=4)

    def scrape_posts(self, post_ids: list[str]):
        for post_id in post_ids:
            try:
                print(f"Scraping post ID: {post_id}")
                self.scrape_post(post_id)
                print(f"Successfully downloaded post {post_id}")
            except Exception as e:
                print(f"Failed to download post {post_id}: {e}")

# Usage example
if __name__ == "__main__":
    scraper = SakugaScraper(root_dir="sakuga_downloads")
    scraper.scrape_posts(["272528", "272541", "272539"])
