In [24]:
from flathub_python_api import (
    ApiClient as FlatpakApiClient,
    Configuration,
    CollectionApi,
    AppsIndex,
)
from typing import Optional
import json
from pydantic import BaseModel, model_validator
from collections import defaultdict
from catalogd_api_client import (
    ApiClient as CatalogdApiClient,
    DefaultApi,
    UnifiedAppApi,
)
from catalogd_api_client import UnifiedApp

from pathlib import Path
import traceback
import cdifflib as difflib
import enlighten

In [25]:
# Initialize the API client
flatpak_client = FlatpakApiClient(configuration=Configuration(host="http://192.168.1.90:8000"))
collection_api = CollectionApi(flatpak_client)

catalogd_client = CatalogdApiClient(configuration=Configuration(host="https://ideapad.alukens.com"))

INDEX_FILEPATH = Path("__file__").resolve().parent / "app_index.json"
CREDENTIALS_FILE = Path("__file__").resolve().parent.parent.parent / "catalogd-scripts" / "credentials.json"
pbar_manager = enlighten.get_manager()

In [4]:
def login_catalogd_client(api_client: CatalogdApiClient, credentials_file: Path):
    with open(credentials_file, "r") as f:
        credentials = json.load(f)
        alice = [cred for cred in credentials if cred["username"] == "alice"][0]

    default_api = DefaultApi(api_client)
    auth_response = default_api.login_for_access_token_api_auth_token_post(
        username=alice["username"], password=alice["password"]
    )
    api_client.set_default_header("Authorization", f"Bearer {auth_response.access_token}")

In [5]:
login_catalogd_client(api_client=catalogd_client, credentials_file=CREDENTIALS_FILE)

In [6]:
categories = json.loads(collection_api.get_categories_collection_category_get_without_preload_content().data)

In [7]:
categories

['audiovideo',
 'development',
 'education',
 'game',
 'graphics',
 'network',
 'office',
 'science',
 'system',
 'utility']

In [8]:
def get_all_apps_in_category(
    collection_api: CollectionApi, category: str, limit: Optional[int] = None
):
    """
    Fetch all apps in a specific category.

    :param category: The category to fetch apps from.
    :param limit: Optional limit on the number of apps to fetch.
    :return: List of apps in the specified category.
    """
    apps = []
    page = 1

    while True:
        print(f"Fetching page {page} for category '{category}'...")
        response = collection_api.get_category_collection_category_category_get(
            category=category, page=page, per_page=100
        )

        apps.extend(response.hits)
        if page == response.total_pages:
            break
        if limit and len(apps) >= limit:
            print(f"Reached limit of {limit} apps.")
            break

        page += 1
    return apps[:limit] if limit else apps

In [9]:
class EntireAppIndex(BaseModel):
    categories: list[str]
    index: dict[str, list[AppsIndex]]

    @model_validator(mode="after")
    def validate_index(self):
        for category in self.categories:
            if category not in self.index:
                print(f"No apps for category {category} in index")
                self.index[category] = []

        return self

    @property
    def app_count(self):
        return sum(len(l) for l in self.index.values())

In [10]:
def fetch_all_apps(
    categories: list[str],
    collection_api: CollectionApi,
    filepath: Path | None,
    force_pull: bool = False,
) -> EntireAppIndex:
            
    all_apps: dict[str, list[AppsIndex]] = defaultdict(list)
    for category in categories:
        print(f"Fetching apps for category: {category}")
        apps_in_cat = get_all_apps_in_category(collection_api, category)
        all_apps[category].extend(apps_in_cat)
    print(f"Total apps fetched: {sum(len(l) for l in all_apps.values())}")
    index = EntireAppIndex(categories=categories, index=all_apps)
    if filepath:
        with open(filepath, "w") as f:
            f.write(index.model_dump_json())

    return index

In [11]:
index = fetch_all_apps(categories=categories, collection_api=collection_api, filepath=INDEX_FILEPATH, force_pull=False)

Fetching apps for category: audiovideo
Fetching page 1 for category 'audiovideo'...
Fetching page 2 for category 'audiovideo'...
Fetching page 3 for category 'audiovideo'...
Fetching page 4 for category 'audiovideo'...
Fetching apps for category: development
Fetching page 1 for category 'development'...
Fetching page 2 for category 'development'...
Fetching page 3 for category 'development'...
Fetching apps for category: education
Fetching page 1 for category 'education'...
Fetching page 2 for category 'education'...
Fetching apps for category: game
Fetching page 1 for category 'game'...
Fetching page 2 for category 'game'...
Fetching page 3 for category 'game'...
Fetching page 4 for category 'game'...
Fetching page 5 for category 'game'...
Fetching page 6 for category 'game'...
Fetching page 7 for category 'game'...
Fetching apps for category: graphics
Fetching page 1 for category 'graphics'...
Fetching page 2 for category 'graphics'...
Fetching page 3 for category 'graphics'...
Fetch

In [12]:
unified_app_api = UnifiedAppApi(catalogd_client)

def get_all_unified_apps(unified_app_api: UnifiedAppApi) -> list[UnifiedApp]:
    apps: list[UnifiedApp] = []
    
    page = 1
    count = 100
    while True:
        results = unified_app_api.find_all_route_api_crud_unified_app_get(limit=count, skip=(page-1)*count)
        if results:
            apps.extend(results)
            page += 1
        else:
            print(f"no apps returned for page {page}, breaking")
            break
    return apps

In [13]:
unified_apps = get_all_unified_apps(unified_app_api=unified_app_api)

no apps returned for page 96, breaking


In [14]:
len(unified_apps)

9444

In [15]:
def match_flatpak_app_with_unified_app(flatpak_app: AppsIndex, unified_apps: list[UnifiedApp]):
    # print(flatpak_app.name)
    flatpak_app_name = flatpak_app.name.lower()
    # replace whitespace with dashes for matching
    flatpak_app_name = flatpak_app_name.replace(" ", "-")
    for unified_app in unified_apps:
        if unified_app.name.lower() == flatpak_app_name:
            return unified_app
    return None


In [16]:
from itertools import chain
matched_apps = []
for flatpak_app in chain(*index.index.values()):
    if unified_app_match := match_flatpak_app_with_unified_app(flatpak_app=flatpak_app, unified_apps=unified_apps):
        matched_apps.append((flatpak_app, unified_app_match))

In [17]:
len(matched_apps)

622

In [18]:
matched_flatpak_apps = [flatpak for flatpak, unified_app in matched_apps]
disjoint_flatpak_apps = [flatpak for flatpak in chain(*index.index.values()) if flatpak not in matched_flatpak_apps]

In [19]:
len(disjoint_flatpak_apps)

2397

In [20]:
exclude_list = [
    "Ruffle",
    "Videos",
    "Music",
    "Builder",
    "Gitter",
    "Minion",
    "Mines",
    "GIScan",
    "gitIgnore",
    "logisim",
    "System",
    "KPatience",
    "Mahjongg",
    "Workbench",
    "Zatikon",
    "Swatch",
    "Gauge",
    "Atomic",
    "Framed",
    "Element",
    "Papers",
    "Calculator",
    "Weather",
    "Vaults",
    "Bottles",
    "Timer",
    "Shortcut",
    "JPEG2PDF",
    "Password",
    "Counters",
]

In [26]:
def attempt_match_disjoint_apps(
    disjoint_flatpak_apps: list[AppsIndex], unified_apps: list[UnifiedApp], exclude_list: Optional[list[str]] = None
):
    # Attempt to match disjoint flatpak apps with unified apps using fuzzy matching on lowercased names
    matched = []
    sm = difflib.CSequenceMatcher(None, "", "")
    match_progress = pbar_manager.counter(total=len(disjoint_flatpak_apps), desc='Attempting matches', unit='apps')
    for flatpak_app in disjoint_flatpak_apps:
        if exclude_list and flatpak_app.name in exclude_list:
            print(f"Skipping {flatpak_app.name} as it is in the exclude list")
            continue
        match_scores: dict[str, float] = {}
        sm.set_seq1(flatpak_app.name.lower())
        for unified_app in unified_apps:
            # use fuzzy matching from difflib
            sm.set_seq2(unified_app.name.lower())
            score = sm.ratio()
            if score > 0.9:  # If the match is above 90%, consider it a match
                match_scores[unified_app.name] = score
        if match_scores:
            best_match = max(match_scores, key=match_scores.get)
            print(f"Best match for {flatpak_app.name} is {best_match} with score {match_scores[best_match]}")
            matched.append((flatpak_app, [app for app in unified_apps if app.name == best_match][0]))
        match_progress.update()
    return matched

In [27]:
attempted_matches = attempt_match_disjoint_apps(
    disjoint_flatpak_apps=disjoint_flatpak_apps, unified_apps=unified_apps, exclude_list=exclude_list
)

Best match for Power Tab Editor is powertabeditor with score 0.9333333333333333
Best match for n-Track Studio is n-trackstudio with score 0.9629629629629629
Best match for Firefly Luciferin is fireflyluciferin with score 0.9696969696969697
Best match for fre:ac is freac with score 0.9090909090909091
Skipping Music as it is in the exclude list
Skipping Videos as it is in the exclude list
Skipping Ruffle as it is in the exclude list
Skipping gitIgnore as it is in the exclude list
Best match for Redis Insight is redisinsight with score 0.96
Best match for Turtle is kturtle with score 0.9230769230769231
Skipping Builder as it is in the exclude list
Best match for pgAdmin 4 is pgadmin4 with score 0.9411764705882353
Skipping Workbench as it is in the exclude list
Best match for Logisim is logiksim with score 0.9333333333333333
Skipping System as it is in the exclude list
Best match for RemNote is remote with score 0.9230769230769231
Best match for Tux Typing is tuxtyping with score 0.9473684

KeyboardInterrupt: 

In [26]:
for flatpak_app, unified_app in attempted_matches:
    if flatpak_app.name in exclude_list:
        continue
    print(f"{flatpak_app.name} {unified_app.name}")

Power Tab Editor powertabeditor
n-Track Studio n-trackstudio
Firefly Luciferin fireflyluciferin
fre:ac freac
Redis Insight redisinsight
Turtle kturtle
pgAdmin 4 pgadmin4
Logisim logiksim
RemNote remote
Tux Typing tuxtyping
WordQuiz kwordquiz
Pixel Wheels pixelwheels
Midnightmare Teddy midnightmareteddy
Dragon’s Apprentice dragonsapprentice
Frog Squash frogsquash
Missile Math missilemath
The Passage thepassage
FaFa Runner fafarunner
SaunaSim sauna-sim
StepMania step-mania
PAC-GAL pacgal
Serious Shooter seriousshooter
Temple Driver templedriver
Tux Fishing tuxfishing
Tux VS Dragon tuxvsdragon
Voxel Paint voxelpaint
Voxel Paint Pro voxelpaintpro
AI Generated Game aigeneratedgame
Mr Rescue mrrescue
Endless Sky endlesssky
Sudoku ksudoku
OpenDungeonsPlus opendungeons-plus
Bugdom 2 bugdom2
Cro-Mag Rally cromagrally
Otto Matic ottomatic
GtkAtlantic gtk-atlantic
World of PADMAN worldofpadman
Warzone 2100 warzone2100
Sky Checkers skycheckers
Dhewm 3 dhewm3
Frozen Bubble frozenbubble
Lights Off l

In [27]:
len(attempted_matches)

78

In [29]:
matched_apps[1]

(AppsIndex(name='QPrompt', keywords=['autocue', 'imaginary', 'prompter', 'qprompt', 'teleprompter'], summary='Personal teleprompter software for all video creators', description="Open source personal teleprompter software for all video creators. Built with ease of use, control accuracy, fast performance, and cross-platform support in mind. QPrompt's convergent user interface can run on Linux, Windows, macOS, and Android.", id='com_cuperino_qprompt', type='desktop-application', translations={}, project_license='GPL-3.0', is_free_license=True, app_id='com.cuperino.qprompt', icon='https://dl.flathub.org/repo/appstream/x86_64/icons/128x128/com.cuperino.qprompt.png', main_categories=MainCategories(anyof_schema_1_validator=None, anyof_schema_2_validator=None, actual_instance=<MainCategory.AUDIOVIDEO: 'audiovideo'>, any_of_schemas={'MainCategory', 'List[MainCategory]'}), sub_categories=['Video'], developer_name='Javier O. Cordero Pérez', verification_verified=True, verification_method=<Verifi

In [1]:
matched_apps.extend(attempted_matches)

NameError: name 'matched_apps' is not defined