In [3]:
more_news = {
    "world": {
        "name": "world",
        "displayName": "World",
        "desc": "World news and international affairs",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
    "us": {
        "name": "us",
        "displayName": "US",
        "desc": "News about the United States",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
    "canada": {
        "name": "canada",
        "displayName": "Canada",
        "desc": "News about Canada",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
    "uk": {
        "name": "uk",
        "displayName": "UK",
        "desc": "News about the United Kingdom",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
    "europe": {
        "name": "europe",
        "displayName": "Europe",
        "desc": "News about Europe",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
    "ukraine": {
        "name": "ukraine",
        "displayName": "Ukraine",
        "desc": "News related to Ukraine and the war",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
    "middleeast": {
        "name": "middleeast",
        "displayName": "Middle East",
        "desc": "News from the Middle East region",
        "match_categories": [
            ["news", "Business News"],
            ["news", "Daily News"],
            ["news", "News"],
            ["news", "News Commentary"],
            ["news", "Politics"],
        ],
    },
}

In [4]:
from collections import defaultdict
from pathlib import Path
import json
import sqlite3
import sys

parent_dir = Path.cwd().parent
sys.path.append(str(parent_dir))

from categories import BASE_INDEX


def safe(str):
    return str.lower().replace("&", "and").replace(" ", "_")


def unsafe(str):
    return str.replace("_and_", " & ").replace("_", " ").title()


def make_head(str):
    return {"name": safe(str), "displayName": unsafe(str)}


out_dir = Path("../working")
out_dir.mkdir(exist_ok=True)

conn = sqlite3.connect("../data/podcasts.db")
cursor = conn.cursor()

cursor.execute(
    """
    SELECT category, subcategory, COUNT(*) as count
    FROM podcast
    GROUP BY category, subcategory
    ORDER BY category, subcategory
"""
)

category_data = defaultdict(list)

for category, subcategory, count in cursor.fetchall():
    category_data[category].append([category, subcategory])

conn.close()

output = BASE_INDEX

for main_cat, entries in category_data.items():
    head = make_head(main_cat)
    subcategories = {}
    for entry in entries:
        main_cat, sub_cat = entry
        if sub_cat == "UNKNOWN":
            continue
        match_categories = [entry]
        if main_cat == safe(sub_cat):
            match_categories.append([main_cat, "UNKNOWN"])
        subcategories[safe(sub_cat)] = {
            **make_head(sub_cat),
            "match_categories": match_categories,
        }
    output[main_cat] = {**head, "subcategories": subcategories}

output["news"]["subcategories"] = {**more_news, **output["news"]["subcategories"]}

out_file = out_dir / "category_mappings.py"

json_str = json.dumps(output)

python_str = f"CATEGORY_MAPPINGS = {json_str}"

with open(out_file, "w", encoding="utf-8") as f:
    f.write(python_str)