# Minimal: Preprocessed TorchScript Inference

This notebook shows the minimal steps to compute a speaker embedding using the preprocessed TorchScript artifact and the saved Hugging Face feature-extractor. It expects the export artifacts to be under `packages/w2vbert_speaker/artifacts/` (created by `scripts/export_w2vbert_torchscript.py`).

In [1]:
import w2vbert_speaker
dir(w2vbert_speaker)

  from .autonotebook import tqdm as notebook_tqdm


['W2VBERT_SPK_Module',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'audio_encoder',
 'compute_input_features_from_wave',
 'feature_utils',
 'forward_impl',
 'load_feature_extractor',
 'local',
 'module']

In [2]:
# Quick environment & import check (run this first).
import sys
print('Python executable:', sys.executable)
try:
    # ensure package exports are available in this kernel
    from w2vbert_speaker import load_feature_extractor, compute_input_features_from_wave
    print('w2vbert_speaker import: OK')
except Exception as exc:
    import traceback
    print('Failed to import w2vbert_speaker in this kernel.')
    traceback.print_exc()
    raise RuntimeError(
        'Please install the package into the kernel environment (e.g. `pip install -e packages/w2vbert_speaker`) and restart the kernel.'
    )

Python executable: /Users/zb/NWG/w2v-BERT-2.0_SV/.venv_w2vbert_notebook/bin/python
w2vbert_speaker import: OK


## Prerequisites

- A Python environment with `torch`, `transformers`, `soundfile`, and `librosa` (or `torchaudio`) installed.
- The exported artifacts: `w2vbert_speaker_script_preprocessed.pt` and the `feature_extractor/` folder under `packages/w2vbert_speaker/artifacts/`.
- If you used the repository helper, run `./scripts/run_export_preprocessed.sh` beforehand.

In [3]:
from pathlib import Path
import torch
import soundfile as sf
import librosa

# Adjust these paths if your repo layout differs
REPO_ROOT = Path.cwd()
ARTIFACTS = (REPO_ROOT / 'packages' / 'w2vbert_speaker' / 'artifacts').resolve()
SCRIPTED_PREPROCESSED = ARTIFACTS / 'w2vbert_speaker_script_preprocessed.pt'
FEATURE_EXTRACTOR_DIR = ARTIFACTS / 'feature_extractor'

# Example audio (change if not available)
AUDIO_PATH = (REPO_ROOT / '..' / 'datasets' / 'voxceleb1test' / 'wav' / 'id10270' / '5r0dWxy17C8' / '00001.wav').resolve()
print('artifact preprocessed exists:', SCRIPTED_PREPROCESSED.exists())
print('feature_extractor dir exists:', FEATURE_EXTRACTOR_DIR.exists())
print('audio exists:', AUDIO_PATH.exists())

ModuleNotFoundError: No module named 'librosa'

In [None]:
# Load the saved feature extractor and compute input_features (minimal runtime)
# Import the helpers from the installed package (package must be installed in the kernel's env)
from w2vbert_speaker import load_feature_extractor, compute_input_features_from_wave

if not FEATURE_EXTRACTOR_DIR.exists():
    raise FileNotFoundError(f'Feature extractor not found at {FEATURE_EXTRACTOR_DIR}; run the export script to create it.')

feature_extractor = load_feature_extractor(FEATURE_EXTRACTOR_DIR)


In [None]:
# Load and (if necessary) resample audio to the extractor's sampling rate
wave, sr = sf.read(str(AUDIO_PATH), dtype='float32')
# convert to mono if necessary
if wave.ndim > 1:
    wave = wave.mean(axis=1)
target_sr = int(feature_extractor.sampling_rate)
if sr != target_sr:
    wave = librosa.resample(wave, orig_sr=sr, target_sr=target_sr)
