# ZIPA ONNX Export

This notebook exports the ZIPA small CTC model to ONNX format.

**Requirements:**
- Google Colab with GPU runtime (free tier works)
- ~10 minutes to complete

**Output:**
- `model.onnx` (~250MB) - FP32 ONNX model
- `vocab.json` - 127 IPA tokens

**Instructions:**
1. Runtime â†’ Change runtime type â†’ **T4 GPU**
2. Runtime â†’ Run all

## 1. Check GPU

In [None]:
%%bash
# Sync from GitHub
if [ ! -d "pp" ]; then
    git clone https://github.com/guettli/pp.git
    echo "Cloned pp repo"
else
    cd pp && git pull
    echo "Updated pp repo"
fi

In [None]:
# Check GPU availability (without importing torch yet)
import subprocess
result = subprocess.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
                       capture_output=True, text=True)
if result.returncode != 0:
    raise RuntimeError("No GPU found! Go to Runtime â†’ Change runtime type â†’ T4 GPU")
print(f"GPU: {result.stdout.strip()}")

## 2. Install Dependencies

In [None]:
%%bash

# Check if already installed correctly
python -c "import torch; assert torch.__version__.startswith('2.4.0')" 2>/dev/null && {
    echo "PyTorch 2.4.0 already installed, skipping..."
    exit 0
}

# Bash Strict Mode
trap 'echo -e "\nðŸ¤· ðŸš¨ ðŸ”¥ Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0" 2>/dev/null || true) ðŸ”¥ ðŸš¨ ðŸ¤· "; exit 3' ERR
set -Eeuo pipefail

echo "=== Uninstalling incompatible packages ==="
pip uninstall -y k2 torch torchaudio torchvision 2>/dev/null || true

echo "=== Installing PyTorch 2.4.0 + CUDA 12.4 ==="
pip install torch==2.4.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124

echo "=== Installing k2 ==="
pip install k2==1.24.4.dev20241030+cuda12.4.torch2.4.0 -f https://k2-fsa.github.io/k2/cuda.html

echo "=== Installing icefall and other dependencies ==="
pip install git+https://github.com/k2-fsa/icefall.git
pip install lhotse --no-deps
pip install cytoolz intervaltree lilcom audioread soundfile
pip install sentencepiece onnx onnxruntime

echo ""
echo "=== Dependencies installed! ==="
echo "=== Runtime will restart in next cell. Then re-run all cells. ==="

In [None]:
# Restart runtime to load new PyTorch version (skips if already correct)
import torch
if not torch.__version__.startswith('2.4.0'):
    print("Restarting runtime to load PyTorch 2.4.0...")
    print("After restart, click 'Runtime â†’ Run all' again.")
    import os
    os.kill(os.getpid(), 9)
else:
    print(f"PyTorch {torch.__version__} already loaded, no restart needed.")

In [None]:
# Verify installations
import torch
import k2
print(f"PyTorch: {torch.__version__}")
#print(f"k2: {k2.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 3. Clone ZIPA Repository

In [None]:
%%bash

# Bash Strict Mode
trap 'echo -e "\nðŸ¤· ðŸš¨ ðŸ”¥ Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0" 2>/dev/null || true) ðŸ”¥ ðŸš¨ ðŸ¤· "; exit 3' ERR
set -Eeuo pipefail

if [ ! -d "zipa" ]; then
    git clone --depth 1 https://github.com/lingjzhu/zipa.git
    echo "ZIPA repo cloned!"
else
    echo "ZIPA repo already exists"
fi

## 4. Download Model and Tokenizer

In [None]:
from huggingface_hub import hf_hub_download
import os

# Create directories
os.makedirs("zipa-export/exp", exist_ok=True)

# Download model checkpoint
model_path = hf_hub_download(
    repo_id="anyspeech/zipa-small-crctc-500k",
    filename="zipa_small_crctc_500000_avg10.pth",
    local_dir="zipa-export"
)
print(f"Model downloaded: {model_path}")

In [None]:
# Wrap checkpoint in icefall format and download tokenizer
import torch

# Load the direct state dict and wrap it for icefall
checkpoint = torch.load("zipa-export/zipa_small_crctc_500000_avg10.pth", map_location="cpu")
wrapped = {"model": checkpoint}
torch.save(wrapped, "zipa-export/exp/epoch-999.pt")
print("Checkpoint wrapped and saved!")

# Download tokenizer
import urllib.request
urllib.request.urlretrieve(
    "https://raw.githubusercontent.com/lingjzhu/zipa/main/ipa_simplified/bpe.model",
    "zipa-export/bpe.model"
)
print("Tokenizer downloaded!")

In [None]:
# Create tokens.txt from BPE model
import sentencepiece as spm

sp = spm.SentencePieceProcessor()
sp.Load("zipa-export/bpe.model")

with open("zipa-export/tokens.txt", "w", encoding="utf-8") as f:
    for i in range(sp.GetPieceSize()):
        f.write(f"{sp.IdToPiece(i)} {i}\n")

print(f"Created tokens.txt with {sp.GetPieceSize()} tokens")

## 5. Export to ONNX

In [None]:
%%bash

# Bash Strict Mode
trap 'echo -e "\nðŸ¤· ðŸš¨ ðŸ”¥ Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0" 2>/dev/null || true) ðŸ”¥ ðŸš¨ ðŸ¤· "; exit 3' ERR
set -Eeuo pipefail

cd zipa/zipformer_crctc

# ZIPA small model parameters (from zipa_ctc_inference.py)
python export-onnx-ctc.py \
    --exp-dir ../../zipa-export/exp \
    --tokens ../../zipa-export/tokens.txt \
    --epoch 999 \
    --avg 1 \
    --use-averaged-model 0 \
    --num-encoder-layers "2,2,3,4,3,2" \
    --feedforward-dim "512,768,1024,1536,1024,768" \
    --encoder-dim "192,256,384,512,384,256" \
    --encoder-unmasked-dim "192,192,256,256,256,192" \
    --num-heads "4,4,4,8,4,4" \
    --cnn-module-kernel "31,31,15,15,15,31" \
    --query-head-dim 32 \
    --value-head-dim 12 \
    --pos-head-dim 4 \
    --pos-dim 48 \
    --downsampling-factor "1,2,4,8,4,2" \
    --causal False \
    --use-transducer 0 \
    --use-ctc 1

echo "ONNX export complete!"

## 6. Create vocab.json for Browser

In [None]:
import json

# Convert tokens.txt to vocab.json
vocab = {}
with open("zipa-export/tokens.txt", "r", encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split(" ")
        if len(parts) == 2:
            token, idx = parts
            vocab[token] = int(idx)

with open("zipa-export/exp/vocab.json", "w", encoding="utf-8") as f:
    json.dump(vocab, f, ensure_ascii=False, indent=2)

print(f"Created vocab.json with {len(vocab)} tokens")

## 7. Validate and Download

In [None]:
import os

# List output files
print("Output files:")
for f in os.listdir("zipa-export/exp"):
    path = f"zipa-export/exp/{f}"
    if os.path.isfile(path):
        size_mb = os.path.getsize(path) / 1024 / 1024
        print(f"  {f}: {size_mb:.1f} MB")

In [None]:
# Validate ONNX model
import onnxruntime as ort
import numpy as np

session = ort.InferenceSession("zipa-export/exp/model.onnx")

print("Model inputs:")
for inp in session.get_inputs():
    print(f"  {inp.name}: {inp.shape}")

print("\nModel outputs:")
for out in session.get_outputs():
    print(f"  {out.name}: {out.shape}")

# Test inference
x = np.random.randn(1, 100, 80).astype(np.float32)
x_lens = np.array([100], dtype=np.int64)
outputs = session.run(None, {"x": x, "x_lens": x_lens})
print(f"\nTest inference successful! Output shape: {outputs[0].shape}")

In [None]:
%%bash

# Bash Strict Mode
trap 'echo -e "\nðŸ¤· ðŸš¨ ðŸ”¥ Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0" 2>/dev/null || true) ðŸ”¥ ðŸš¨ ðŸ¤· "; exit 3' ERR
set -Eeuo pipefail

cd zipa-export/exp
zip -r ../../zipa-onnx-export.zip model.onnx vocab.json

echo ""
echo "=================================================="
echo "EXPORT COMPLETE!"
echo "=================================================="
echo ""
echo "Download zipa-onnx-export.zip from the file browser"
echo "(left panel) or run the next cell."

In [None]:
# Download the zip file
from google.colab import files
files.download("zipa-onnx-export.zip")