In [None]:
!git clone https://github.com/aspisov/asr.git
!wget https://huggingface.co/aspisov/asr/resolve/main/model_best-193347.pth -O asr/model_best.pth
!mkdir asr/saved
!wget https://openslr.trmal.net/resources/11/librispeech-vocab.txt -O asr/saved/librispeech-vocab.txt
!wget https://openslr.trmal.net/resources/11/3-gram.arpa.gz -O asr/saved/3-gram.arpa.gz
!gunzip asr/saved/3-gram.arpa.gz
!pip install --quiet gdown

In [None]:
import os
import subprocess

subprocess.run(["uv", "sync"], cwd="asr")

In [None]:
# Fetch custom dataset from Google Drive
GOOGLE_DRIVE_LINK = "https://drive.google.com/drive/folders/1sqk3q9ejBDg76C8I15jXDLe87VQDZ2_f?usp=drive_link"
CUSTOM_ROOT = "data/custom_drive"
BATCH_SIZE = 5
!gdown --folder {GOOGLE_DRIVE_LINK} -O asr/{CUSTOM_ROOT}


def run_inference(name: str, config: str, checkpoint: str, extra_args=None):
    env = os.environ.copy()
    env["MPLBACKEND"] = "Agg"
    cmd = [
        "uv", "run", "python3", "inference.py",
        f"-cn={config}", f"inferencer.from_pretrained={checkpoint}", f"dataloader.batch_size={BATCH_SIZE}",
    ]
    if extra_args:
        cmd.extend(extra_args)
    print(" ".join(cmd))
    result = subprocess.run(cmd, cwd="asr", text=True, capture_output=True, env=env)
    print(result.stdout)
    print(result.stderr)

CHECKPOINT_PATH = "model_best.pth"

# Custom Drive dataset
run_inference(
    name="custom_drive",
    config="inference_custom.yaml",
    checkpoint=CHECKPOINT_PATH,
    extra_args=[f"custom_dataset.dataset_root={CUSTOM_ROOT}"],
)

# LibriSpeech test-clean
run_inference(
    name="test_clean",
    config="inference_test_clean.yaml",
    checkpoint=CHECKPOINT_PATH,
)

# LibriSpeech test-other
run_inference(
    name="test_other",
    config="inference_test_other.yaml",
    checkpoint=CHECKPOINT_PATH,
)