In [8]:
from flathub_python_api import (
    ApiClient as FlatpakApiClient,
    Configuration,
    CollectionApi,
    AppsIndex,
)
from typing import Optional
import json
from pydantic import BaseModel, model_validator
from collections import defaultdict
from catalogd_api_client import (
    ApiClient as CatalogdApiClient,
    DefaultApi,
    UnifiedAppApi,
    SpecificAppApi,
    AppSourceApi,
    AppSource,
    SpecificApp,
    
)
from catalogd_api_client.exceptions import NotFoundException
from catalogd_api_client import UnifiedApp

from pathlib import Path
import traceback
import cdifflib
import tqdm

In [30]:
# Initialize the API client
flatpak_client = FlatpakApiClient(configuration=Configuration(host="http://ideapad.alukens.com:8000"))
collection_api = CollectionApi(flatpak_client)

catalogd_client = CatalogdApiClient(configuration=Configuration(host="https://ideapad.alukens.com"))

INDEX_FILEPATH = Path("__file__").resolve().parent / "app_index.json"
CREDENTIALS_FILE = Path("__file__").resolve().parent.parent.parent / "catalogd-scripts" / "credentials.json"


In [31]:
def login_catalogd_client(api_client: CatalogdApiClient, credentials_file: Path):
    with open(credentials_file, "r") as f:
        credentials = json.load(f)
        alice = [cred for cred in credentials if cred["username"] == "alice"][0]

    default_api = DefaultApi(api_client)
    auth_response = default_api.login_for_access_token_api_auth_token_post(
        username=alice["username"], password=alice["password"]
    )
    api_client.set_default_header("Authorization", f"Bearer {auth_response.access_token}")

In [32]:
login_catalogd_client(api_client=catalogd_client, credentials_file=CREDENTIALS_FILE)

In [33]:
collection_api.get_categories_collection_category_get_without_preload_content().data

b'["audiovideo","development","education","game","graphics","network","office","science","system","utility"]'

In [35]:
categories = json.loads(collection_api.get_categories_collection_category_get_without_preload_content().data)

In [36]:
categories

['audiovideo',
 'development',
 'education',
 'game',
 'graphics',
 'network',
 'office',
 'science',
 'system',
 'utility']

In [37]:
def get_all_apps_in_category(
    collection_api: CollectionApi, category: str, limit: Optional[int] = None
):
    """
    Fetch all apps in a specific category.

    :param category: The category to fetch apps from.
    :param limit: Optional limit on the number of apps to fetch.
    :return: List of apps in the specified category.
    """
    apps = []
    page = 1

    while True:
        print(f"Fetching page {page} for category '{category}'...")
        response = collection_api.get_category_collection_category_category_get(
            category=category, page=page, per_page=100
        )

        apps.extend(response.hits)
        if page == response.total_pages:
            break
        if limit and len(apps) >= limit:
            print(f"Reached limit of {limit} apps.")
            break

        page += 1
    return apps[:limit] if limit else apps

In [38]:
class EntireAppIndex(BaseModel):
    categories: list[str]
    index: dict[str, list[AppsIndex]]

    @model_validator(mode="after")
    def validate_index(self):
        for category in self.categories:
            if category not in self.index:
                print(f"No apps for category {category} in index")
                self.index[category] = []

        return self

    @property
    def app_count(self):
        return sum(len(l) for l in self.index.values())

In [39]:
def fetch_all_apps(
    categories: list[str],
    collection_api: CollectionApi,
    filepath: Path | None,
    force_pull: bool = False,
) -> EntireAppIndex:
            
    all_apps: dict[str, list[AppsIndex]] = defaultdict(list)
    for category in categories:
        print(f"Fetching apps for category: {category}")
        apps_in_cat = get_all_apps_in_category(collection_api, category)
        all_apps[category].extend(apps_in_cat)
    print(f"Total apps fetched: {sum(len(l) for l in all_apps.values())}")
    index = EntireAppIndex(categories=categories, index=all_apps)
    if filepath:
        with open(filepath, "w") as f:
            f.write(index.model_dump_json())

    return index

In [40]:
index = fetch_all_apps(categories=categories, collection_api=collection_api, filepath=INDEX_FILEPATH, force_pull=False)

Fetching apps for category: audiovideo
Fetching page 1 for category 'audiovideo'...
Fetching page 2 for category 'audiovideo'...
Fetching page 3 for category 'audiovideo'...
Fetching page 4 for category 'audiovideo'...
Fetching apps for category: development
Fetching page 1 for category 'development'...
Fetching page 2 for category 'development'...
Fetching page 3 for category 'development'...
Fetching apps for category: education
Fetching page 1 for category 'education'...
Fetching page 2 for category 'education'...
Fetching apps for category: game
Fetching page 1 for category 'game'...
Fetching page 2 for category 'game'...
Fetching page 3 for category 'game'...
Fetching page 4 for category 'game'...
Fetching page 5 for category 'game'...
Fetching page 6 for category 'game'...
Fetching page 7 for category 'game'...
Fetching apps for category: graphics
Fetching page 1 for category 'graphics'...
Fetching page 2 for category 'graphics'...
Fetching page 3 for category 'graphics'...
Fetch

In [69]:
unified_app_api = UnifiedAppApi(catalogd_client)

def get_all_unified_apps(unified_app_api: UnifiedAppApi) -> list[UnifiedApp]:
    apps: list[UnifiedApp] = []
    
    page = 1
    count = 100
    while True:
        results = unified_app_api.find_all_route_api_crud_unified_app_get(limit=count, skip=(page-1)*count)
        if results:
            apps.extend(results)
            page += 1
        else:
            print(f"no apps returned for page {page}, breaking")
            break
    return apps

In [70]:
unified_apps = get_all_unified_apps(unified_app_api=unified_app_api)

no apps returned for page 2, breaking


In [71]:
len(unified_apps)

1

In [72]:
def match_flatpak_app_with_unified_app(flatpak_app: AppsIndex, unified_apps: list[UnifiedApp]):
    # print(flatpak_app.name)
    flatpak_app_name = flatpak_app.name.lower()
    # replace whitespace with dashes for matching
    flatpak_app_name = flatpak_app_name.replace(" ", "-")
    for unified_app in unified_apps:
        if unified_app.name.lower() == flatpak_app_name:
            return unified_app
    return None


In [73]:
from itertools import chain
matched_apps = []
for flatpak_app in chain(*index.index.values()):
    if unified_app_match := match_flatpak_app_with_unified_app(flatpak_app=flatpak_app, unified_apps=unified_apps):
        matched_apps.append((flatpak_app, unified_app_match))

In [74]:
len(matched_apps)

0

In [75]:
matched_flatpak_apps = [flatpak for flatpak, unified_app in matched_apps]
disjoint_flatpak_apps = [flatpak for flatpak in chain(*index.index.values()) if flatpak not in matched_flatpak_apps]

In [76]:
len(disjoint_flatpak_apps)

3019

In [77]:
exclude_list = [
    "Ruffle",
    "Videos",
    "Music",
    "Builder",
    "Gitter",
    "Minion",
    "Mines",
    "GIScan",
    "gitIgnore",
    "logisim",
    "System",
    "KPatience",
    "Mahjongg",
    "Workbench",
    "Zatikon",
    "Swatch",
    "Gauge",
    "Atomic",
    "Framed",
    "Element",
    "Papers",
    "Calculator",
    "Weather",
    "Vaults",
    "Bottles",
    "Timer",
    "Shortcut",
    "JPEG2PDF",
    "Password",
    "Counters",
]

In [78]:
def attempt_match_disjoint_apps(
    disjoint_flatpak_apps: list[AppsIndex], unified_apps: list[UnifiedApp], exclude_list: Optional[list[str]] = None
):
    # Attempt to match disjoint flatpak apps with unified apps using fuzzy matching on lowercased names
    matched = []
    sm = cdifflib.CSequenceMatcher(None, "", "")
    pbar = tqdm.tqdm(unified_apps, desc="Matching disjoint apps", mininterval=0.5)
    for unified_app in pbar:
        best_score = 0.0
        best_flatpak = None
        sm.set_seq2(unified_app.name.lower())
        for flatpak_app in disjoint_flatpak_apps:
            if exclude_list and flatpak_app.name in exclude_list:
                continue
            sm.set_seq1(flatpak_app.name.lower())
            score = sm.ratio()
            if score > best_score:
                best_score = score
                best_flatpak = flatpak_app
        if best_score > 0.9 and best_flatpak is not None:
            pbar.write(f"Best match for {unified_app.name} is {best_flatpak.name} with score {best_score}")
            matched.append((best_flatpak, unified_app))
    return matched


In [79]:
attempted_matches = attempt_match_disjoint_apps(
    disjoint_flatpak_apps=disjoint_flatpak_apps, unified_apps=unified_apps, exclude_list=exclude_list
)

Matching disjoint apps: 100%|██████████| 1/1 [00:00<00:00, 39.08it/s]


In [None]:
for flatpak_app, unified_app in attempted_matches:
    if flatpak_app.name in exclude_list:
        continue
    print(f"{flatpak_app.name} {unified_app.name}")

Power Tab Editor powertabeditor
n-Track Studio n-trackstudio
Firefly Luciferin fireflyluciferin
fre:ac freac
Redis Insight redisinsight
Turtle kturtle
pgAdmin 4 pgadmin4
Logisim logiksim
RemNote remote
Tux Typing tuxtyping
WordQuiz kwordquiz
Pixel Wheels pixelwheels
Midnightmare Teddy midnightmareteddy
Dragon’s Apprentice dragonsapprentice
Frog Squash frogsquash
Missile Math missilemath
The Passage thepassage
FaFa Runner fafarunner
SaunaSim sauna-sim
StepMania step-mania
PAC-GAL pacgal
Serious Shooter seriousshooter
Temple Driver templedriver
Tux Fishing tuxfishing
Tux VS Dragon tuxvsdragon
Voxel Paint voxelpaint
Voxel Paint Pro voxelpaintpro
AI Generated Game aigeneratedgame
Mr Rescue mrrescue
Endless Sky endlesssky
Sudoku ksudoku
OpenDungeonsPlus opendungeons-plus
Bugdom 2 bugdom2
Cro-Mag Rally cromagrally
Otto Matic ottomatic
GtkAtlantic gtk-atlantic
World of PADMAN worldofpadman
Warzone 2100 warzone2100
Sky Checkers skycheckers
Dhewm 3 dhewm3
Frozen Bubble frozenbubble
Lights Off l

In [45]:
len(attempted_matches)

79

In [38]:
matched_apps[1]

(AppsIndex(name='QPrompt', keywords=['autocue', 'imaginary', 'prompter', 'qprompt', 'teleprompter'], summary='Personal teleprompter software for all video creators', description="Open source personal teleprompter software for all video creators. Built with ease of use, control accuracy, fast performance, and cross-platform support in mind. QPrompt's convergent user interface can run on Linux, Windows, macOS, and Android.", id='com_cuperino_qprompt', type='desktop-application', translations={}, project_license='GPL-3.0', is_free_license=True, app_id='com.cuperino.qprompt', icon='https://dl.flathub.org/repo/appstream/x86_64/icons/128x128/com.cuperino.qprompt.png', main_categories=MainCategories(anyof_schema_1_validator=None, anyof_schema_2_validator=None, actual_instance=<MainCategory.AUDIOVIDEO: 'audiovideo'>, any_of_schemas={'List[MainCategory]', 'MainCategory'}), sub_categories=['Video'], developer_name='Javier O. Cordero Pérez', verification_verified=True, verification_method=<Verifi

In [None]:
matched_apps.extend(attempted_matches)
# remove attempted matches from disjoint apps
disjoint_flatpak_apps = [flatpak for flatpak in disjoint_flatpak_apps if flatpak not in [flatpak for flatpak, unified_app in attempted_matches]]

In [54]:
# remove duplicates from matched apps based on flatpak app id
def remove_duplicates(matched_apps):
    seen_flatpak_ids = set()
    unique_matched_apps = []
    for flatpak, unified_app in matched_apps:
        if flatpak.app_id not in seen_flatpak_ids:
            unique_matched_apps.append((flatpak, unified_app))
            seen_flatpak_ids.add(flatpak.app_id)
    return unique_matched_apps
matched_apps = remove_duplicates(matched_apps)

In [None]:
def sync_flatpak_specific_apps(
    catalogd_client: CatalogdApiClient,
    matched_apps: list[tuple[AppsIndex, UnifiedApp]],
    disjoint_flatpak_apps: list[AppsIndex],
):
    specific_app_api = SpecificAppApi(api_client=catalogd_client)
    unified_app_api = UnifiedAppApi(api_client=catalogd_client)

    for flatpak_app, unified_app in tqdm.tqdm(matched_apps, desc="Syncing specific apps", mininterval=0.5):
        # check if specific app already exists for this flatpak app id
        check_or_add_specific_app(
            unified_app_api=unified_app_api,
            specific_app_api=specific_app_api,
            unified_app=unified_app,
            flatpak_app=flatpak_app,
        )

    for flatpak_app in disjoint_flatpak_apps:
        check_or_add_specific_app(
            unified_app_api=unified_app_api,
            specific_app_api=specific_app_api,
            unified_app=None,
            flatpak_app=flatpak_app,
        )


def get_or_create_flathub_source(catalogd_client: CatalogdApiClient):
    app_source_api = AppSourceApi(api_client=catalogd_client)
    try:
        flathub_source = app_source_api.find_one_api_crud_app_source_one_get(
            where=json.dumps({"and": [{"name": {"eq": "Flathub"}}, {"type": {"eq": "flatpak"}}]})
        )
    except NotFoundException:
        flathub_source = None
    if not flathub_source:
        print("Flathub source not found, creating...")
        flathub_source = AppSource(name="Flathub", location="https://flathub.org", type="flatpak", authenticated=False)
        flathub_source = app_source_api.create_route_api_crud_app_source_post(flathub_source.model_dump(mode="json"))

    return flathub_source


def create_mongodb_flatpak_query(flatpak_app: AppsIndex, source: AppSource) -> str:
    return json.dumps({"and": [{"external_id": {"eq": flatpak_app.app_id}}, {"source": {"eq": source.id}}]})


def check_or_add_specific_app(
    unified_app_api: UnifiedAppApi,
    specific_app_api: SpecificAppApi,
    unified_app: UnifiedApp | None,
    flatpak_app: AppsIndex,
):
    flathub_source = get_or_create_flathub_source(catalogd_client=unified_app_api.api_client)
    if not unified_app:
        print(f"No unified app found for flatpak app {flatpak_app.name}. Creating first...")
        unified_app = UnifiedApp(
            name=flatpak_app.name,
            title=flatpak_app.name,
            description=flatpak_app.description,
            author=flatpak_app.developer_name or "Unknown",
            icon=flatpak_app.icon,
        )
        unified_app = unified_app_api.create_route_api_crud_unified_app_post(unified_app.model_dump(mode="json"))
    # check if specific app already exists for this flatpak app id
    try:
        specific_app = specific_app_api.find_all_route_api_crud_specific_app_get(
            where=create_mongodb_flatpak_query(flatpak_app, flathub_source)
        )
    except NotFoundException:
        specific_app = None

    if not specific_app:
        print(f"Specific app not found for flatpak app {flatpak_app.name}. Creating...")


def delete_all_flathub_specific_apps(catalogd_client: CatalogdApiClient, flathub_source: AppSource):
    specific_app_api = SpecificAppApi(api_client=catalogd_client)
    specific_apps = specific_app_api.find_all_route_api_crud_specific_app_get(
        where=json.dumps({"source": {"eq": flathub_source.id}})
    )
    for specific_app in specific_apps:
        print(f"Deleting specific app {specific_app.id} for unified app {specific_app.unified_app_id}")
        specific_app_api.delete_route_api_crud_specific_app_id_delete(specific_app.id)

In [87]:
get_or_create_flathub_source(catalogd_client=catalogd_client)

AppSource(created_at=datetime.datetime(2025, 10, 12, 22, 42, 10, 249037, tzinfo=TzInfo(UTC)), updated_at=datetime.datetime(2025, 10, 12, 22, 42, 10, 249040, tzinfo=TzInfo(UTC)), id=3, name='Flathub', type='flatpak', description='', location='https://flathub.org', authenticated=False)

In [90]:
print(disjoint_flatpak_apps[0].model_dump_json(indent=2))

{
  "name": "Ear Tag",
  "keywords": [
    "music",
    "audio",
    "tag",
    "tagging",
    "tagger"
  ],
  "summary": "Edit audio file tags",
  "description": "Ear Tag is a simple audio file tag editor. It is primarily geared towards making quick edits or bulk-editing tracks in albums/EPs. Unlike other tagging programs, Ear Tag does not require the user to set up a music library folder. It can:\n      \n        Edit tags of MP3, WAV, M4A, FLAC, OGG and WMA files\n        Modify metadata of multiple files at once\n        Rename files using information from present tags\n        Identify files using AcoustID\n      \n      Network access is only used for the \"Identify selected files\" option.\n    ",
  "id": "app_drey_EarTag",
  "type": "desktop-application",
  "translations": null,
  "project_license": "MIT",
  "is_free_license": true,
  "app_id": "app.drey.EarTag",
  "icon": "https://dl.flathub.org/media/app/drey/EarTag/c7d8d673f91419413805731f0dabe70d/icons/128x128/app.drey.EarT