# ASR Commands Model Evaluation

This notebook loads the archived ASR commands model and evaluates it on the test set. Metrics and visualizations are saved to the evaluation directory.

In [17]:
# Setup: imports and paths
import sys
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import pandas as pd

from utils.paths import CACHE_PATH, ROOT_PATH

EVAL_DIR = ROOT_PATH / 'evaluation'/ 'asr_commands'
ARCHIVE_PATH = ROOT_PATH / 'models' / 'asr_commands_outputs.zip'
CACHE_DIR = CACHE_PATH / 'models' / 'asr_commands'
LABELS_PATH = CACHE_PATH / "asr_commands" / 'labels.json'

from evaluation.asr_commands.eval_utils import compute_metrics
from evaluation.asr_commands.model_utils import load_model_and_extractor, load_test_dataset, preprocess_test_dataset
from evaluation.asr_commands.unpack_model import unpack_model

In [12]:
# Unpack model to cache dir (if needed)
unpack_model(ARCHIVE_PATH, CACHE_DIR)
# Load test split and labels


PosixPath('/home/mateusz/dev/pjatk_zum/.cache/models/asr_commands')

In [None]:
labels = json.loads(LABELS_PATH.read_text(encoding="utf-8"))["labels"]

In [18]:
RAW_DIR = CACHE_PATH / "asr_commands" / "raw"
mini_root = RAW_DIR / "mini_speech_commands"

label2id = {lbl: i for i, lbl in enumerate(labels)}
id2label = {i: lbl for lbl, i in label2id.items()}

rows = []
for lbl in labels:
    for wav_path in sorted((mini_root / lbl).glob("*.wav")):
        rows.append({"audio": str(wav_path), "label_str": lbl, "label": int(label2id[lbl])})

df = pd.DataFrame(rows)

In [19]:
df

Unnamed: 0,audio,label_str,label
0,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,down,0
1,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,down,0
2,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,down,0
3,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,down,0
4,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,down,0
...,...,...,...
7995,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,yes,7
7996,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,yes,7
7997,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,yes,7
7998,/home/mateusz/dev/pjatk_zum/.cache/asr_command...,yes,7


In [13]:
# Load model and feature extractor
model, feature_extractor, device = load_model_and_extractor(Path('/home/mateusz/dev/pjatk_zum/.cache/models/asr_commands/outputs/asr_commands/best_hf'))

Loading weights: 100%|██████████| 216/216 [00:00<00:00, 784.94it/s, Materializing param=projector.weight]                                                    


In [15]:
labels

['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']

In [14]:
# Preprocess test set
test_ds_proc = preprocess_test_dataset(labels, feature_extractor)

AttributeError: 'list' object has no attribute 'map'

In [None]:
# Run inference
all_preds, all_labels = [], []
with torch.no_grad():
    for ex in test_ds_proc:
        inp = torch.tensor(ex['input_values']).unsqueeze(0).to(device)
        logits = model(inp).logits.cpu().numpy()[0]
        pred = np.argmax(logits)
        all_preds.append(pred)
        all_labels.append(ex['label'])
all_preds, all_labels = np.array(all_preds), np.array(all_labels)

In [None]:
# Compute and save metrics
metrics = compute_metrics(all_labels, all_preds, labels)
with open(EVAL_DIR / 'metrics.json', 'w', encoding='utf-8') as f:
    json.dump(metrics, f, indent=2)
metrics

In [None]:
# Visualize confusion matrix
cm = np.array(metrics['confusion_matrix'])
fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(cm, cmap='Blues')
ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.set_yticklabels(labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.colorbar(im)
plt.tight_layout()
plt.savefig(EVAL_DIR / 'confusion_matrix.png')
plt.show()

## Results
- Metrics are saved to `metrics.json` in the evaluation directory.
- Confusion matrix is saved as `confusion_matrix.png`.
- See the classification report in the metrics output above.