In [None]:
from pathlib import Path

# Define the directory path
directory_path = Path("content/exports/pageref")

# Find all JSON files in the directory
pageref_files = list(directory_path.glob("*.json"))

# Print the list of JSON files
print(pageref_files)

In [None]:
import json
from dataclasses import dataclass


@dataclass
class Book:
    name: str
    shortname: str
    year: int
    description: str
    type: str
    obsolete: str
    contents: dict

    def __str__(self):
        return f"{self.name} ({self.type}, {self.year})"

    def __repr__(self):
        return f"Book(name={self.name}, year={self.year})"


books = []
for file in pageref_files:
    with open(file, "r") as f:
        data = json.load(f)
        book = Book(
            name=data.get("name", ""),
            shortname=data.get("shortname", ""),
            year=data.get("year", 0),
            description=data.get("description", ""),
            type=data.get("type", ""),
            obsolete=data.get("obsolete", ""),
            contents=data.get("contents", {}),
        )
        books.append(book)

# Print the list of books
for book in books:
    print(book)

In [None]:
from dataclasses import field


@dataclass
class RefManager:
    refs: list = field(default_factory=list)

    def add(cls, *args, **kwargs):
        ref = Ref(*args, **kwargs)
        cls.refs.append(ref)
        return ref

    def children(self, parent: "Ref"):
        return [ref for ref in self.refs if ref.parent == parent]


@dataclass
class Ref:
    book: Book
    title: str
    page: str
    parent: "Ref" = None
    category: str = None
    description: str = None

    def resolve_page(self):
        if self.page:
            return self.page

        if self.parent:
            return self.parent.resolve_page()

        return None

    def display(self):
        return f"{self.title} ({self.category}, {self.book.shortname}, p{self.resolve_page()}) {self.description or ""}".strip()

In [None]:
test_rm = RefManager()
root = test_rm.add(None, "Root", "1")
child = test_rm.add(None, "Child", "2", root)
grandchild = test_rm.add(None, "Grandchild", "", child)

assert test_rm.children(root) == [child]
assert root.resolve_page() == "1"
assert child.resolve_page() == "2"
assert grandchild.resolve_page() == "2"

In [None]:
from pprint import pp

rm = RefManager()


def add_refs_from_contents(
    rm: RefManager, book: Book, contents: dict, parent=None, category=None
):
    for key, items in contents.items():
        for item in items:
            title = item.get("title")
            page = item.get("page", "")
            cat = key.title() if category is None else category
            description = item.get("description", None)
            if title:
                ref = rm.add(
                    book,
                    title,
                    page,
                    parent,
                    cat,
                    description,
                )
                for inner_k in [
                    "skills",
                    "tactics",
                    "fighters",
                    "powers",
                    "subsections",
                ]:
                    if inner_k in item:
                        add_refs_from_contents(
                            rm, book, {inner_k: item[inner_k]}, ref, cat
                        )
                # Special case for data that is simply a string
                for inner_k in ["special ability"]:
                    if inner_k in item:
                        add_refs_from_contents(
                            rm, book, {inner_k: [dict(title=item[inner_k])]}, ref, cat
                        )


for book in books:
    if book.obsolete == "true":
        continue
    print(f"Adding references for {book.name}")
    add_refs_from_contents(rm, book, book.contents)

pp(rm.refs)

In [None]:
for ref in rm.refs:
    print(ref.display())

In [None]:
def find(rm: RefManager, title: str):
    return [ref for ref in rm.refs if ref.title.lower() == title.lower()]


assert find(rm, "Agility")[0].display() == "Agility (Skills, Core, p256)"
assert (
    find(rm, "Ironhead Squat Prospectors Charter Master")[0].display()
    == "Ironhead Squat Prospectors Charter Master (Gangs, Outlands, p28)"
)

In [None]:
from difflib import SequenceMatcher


def similar(a, b):
    lower_a = a.lower()
    lower_b = b.lower()
    if lower_a == lower_b:
        return 1.0
    if lower_a in lower_b or lower_b in lower_a:
        return 0.9
    return SequenceMatcher(None, a, b).ratio()


def find_similar(rm: RefManager, title: str):
    list = sorted(
        rm.refs, key=lambda ref: similar(ref.title.lower(), title.lower()), reverse=True
    )
    top_10 = list[0:10]
    return [ref for ref in top_10 if similar(ref.title.lower(), title.lower()) > 0.8]


pp(find_similar(rm, "Agility"))
pp(find_similar(rm, "Charter Master"))
pp(find_similar(rm, "Settlement Raid"))
