In [41]:
# %%
from pathlib import Path
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchaudio
import torchaudio.transforms as T
import soundfile as sf

import nengo
import nengo_loihi
import numpy as np
from torch.utils.data import Dataset, DataLoader

# -------------------------------------------------
# Paths
# -------------------------------------------------
PROJECT_ROOT = Path.cwd().resolve().parent   # parent of loihi_emulator
DATA_ROOT = PROJECT_ROOT / "sample_data" / "speech_commands_v0.02"
MODEL_DIR = PROJECT_ROOT / "saved_models"

print("PROJECT_ROOT:", PROJECT_ROOT)
print("DATA_ROOT exists:", DATA_ROOT.exists())
print("MODEL_DIR exists:", MODEL_DIR.exists())

device = torch.device("cpu")  # Loihi sim is CPU-only anyway
print("Using device:", device)

# Restrict to the same 6 classes
CLASSES = ["yes", "no", "go", "stop", "down", "up"]
NUM_CLASSES = len(CLASSES)

PROJECT_ROOT: /Users/maddy/Desktop/PLEP/Project/CS-576-Final-Project
DATA_ROOT exists: True
MODEL_DIR exists: True
Using device: cpu


In [42]:
# %%
class CNN_KWS(nn.Module):
    def __init__(self, num_classes=6, flatten_dim=3840):
        super().__init__()

        self.flatten_dim = flatten_dim  # in_features of first FC layer

        self.features = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),          # 40xT -> 20xT/2
            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),          # 20xT/2 -> 10xT/4
        )

        self.classifier = nn.Sequential(
            nn.Linear(self.flatten_dim, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes),
        )

    def forward(self, x):
        # x: [B, 40, T]
        x = x.unsqueeze(1)           # [B,1,40,T]
        x = self.features(x)
        x = torch.flatten(x, 1)      # [B,F]

        # Safety: crop / pad to expected flatten_dim
        F_now = x.shape[1]
        if F_now > self.flatten_dim:
            x = x[:, :self.flatten_dim]
        elif F_now < self.flatten_dim:
            pad = self.flatten_dim - F_now
            x = F.pad(x, (0, pad))

        x = self.classifier(x)
        return x

In [44]:
# %%
cnn_ckpt_path = MODEL_DIR / "baseline_cnn_kws_vfinal.pt"
print("CNN checkpoint exists:", cnn_ckpt_path.exists())

# Peek into state_dict to get correct flatten_dim
state_dict = torch.load(cnn_ckpt_path, map_location=device)
flatten_dim_ckpt = state_dict["classifier.0.weight"].shape[1]
print("Flatten dim in checkpoint:", flatten_dim_ckpt)

cnn_model = CNN_KWS(num_classes=NUM_CLASSES, flatten_dim=flatten_dim_ckpt).to(device)
cnn_model.load_state_dict(state_dict)
cnn_model.eval()
print("CNN model loaded successfully.")

# Grab final FC weights for Loihi classifier
fc2 = cnn_model.classifier[2]
W = fc2.weight.detach().cpu().numpy()  # [6, 64]
b = fc2.bias.detach().cpu().numpy()    # [6]
print("fc2 weight shape:", W.shape)

CNN checkpoint exists: True
Flatten dim in checkpoint: 3840
CNN model loaded successfully.
fc2 weight shape: (6, 64)


In [45]:
# %%
SAMPLE_RATE = 16000
N_MFCC = 40

mfcc_transform = nn.Sequential(
    T.MFCC(
        sample_rate=SAMPLE_RATE,
        n_mfcc=N_MFCC,
        melkwargs={
            "n_fft": 400,
            "hop_length": 160,
            "n_mels": 40,
            "center": False,
        },
    ),
    T.AmplitudeToDB(),
)

def wav_to_mfcc(path: Path) -> torch.Tensor:
    """
    Read WAV with soundfile (avoids TorchCodec), resample if needed,
    then compute normalized MFCC [40, T].
    """
    waveform, sr = sf.read(str(path))
    waveform = torch.tensor(waveform).float()

    # Ensure shape [1, N]
    if waveform.ndim == 1:
        waveform = waveform.unsqueeze(0)
    elif waveform.ndim == 2 and waveform.shape[1] > waveform.shape[0]:
        waveform = waveform.T  # [C,N]

    if sr != SAMPLE_RATE:
        waveform = torchaudio.functional.resample(waveform, sr, SAMPLE_RATE)

    mfcc = mfcc_transform(waveform).squeeze(0)  # [40,T]
    mfcc = (mfcc - mfcc.mean()) / (mfcc.std() + 1e-6)
    mfcc = torch.clamp(mfcc, -2.0, 2.0)
    return mfcc

In [46]:
# %%
class KWS_Dataset(Dataset):
    def __init__(self, files, classes):
        self.files = files
        self.classes = classes

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        path = self.files[idx]
        mfcc = wav_to_mfcc(path)    # [40,T]
        label = path.parent.name
        y = self.classes.index(label)
        return mfcc, y


def pad_collate(batch):
    xs, ys = zip(*batch)
    max_t = max(x.shape[1] for x in xs)
    xs = [F.pad(x, (0, max_t - x.shape[1])) for x in xs]
    xs = torch.stack(xs)   # [B,40,T]
    ys = torch.tensor(ys)
    return xs, ys


# Build a test file list
file_list = []
for c in CLASSES:
    class_dir = DATA_ROOT / c
    file_list += sorted(class_dir.glob("*.wav"))

print("Total WAV files in 6 classes:", len(file_list))

# Shuffle and optionally subsample
random.seed(0)
random.shuffle(file_list)

test_files = file_list  # you can slice e.g. file_list[:1000]

test_dataset = KWS_Dataset(test_files, CLASSES)
test_loader = DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=True,
    collate_fn=pad_collate,
)

print("Loaded test samples:", len(test_dataset))

Total WAV files in 6 classes: 23377
Loaded test samples: 23377


In [47]:
# %%
def extract_cnn_features(x: torch.Tensor, model: CNN_KWS) -> torch.Tensor:
    """
    x: [B,40,T]
    returns: [B,64] feature vector after first FC + ReLU
    """
    with torch.no_grad():
        x_ = x.unsqueeze(1)     # [B,1,40,T]
        h = model.features(x_)
        h = torch.flatten(h, 1)
        # crop/pad like in forward
        F_now = h.shape[1]
        if F_now > model.flatten_dim:
            h = h[:, :model.flatten_dim]
        elif F_now < model.flatten_dim:
            pad = model.flatten_dim - F_now
            h = F.pad(h, (0, pad))
        fc1 = model.classifier[0]
        h = F.relu(fc1(h))
    return h   # [B,64]


# Quick sanity check on one example
from pathlib import Path as _P

example_path = next(iter(test_files))
print("Example file:", example_path)

mfcc_ex = wav_to_mfcc(example_path).unsqueeze(0).to(device)  # [1,40,T]
with torch.no_grad():
    feats_ex = extract_cnn_features(mfcc_ex, cnn_model)
    logits_ex = cnn_model(mfcc_ex)
    pred_idx = logits_ex.argmax(dim=1).item()

print("Feat shape:", feats_ex.shape)
print("CNN predicts:", CLASSES[pred_idx])

Example file: /Users/maddy/Desktop/PLEP/Project/CS-576-Final-Project/sample_data/speech_commands_v0.02/stop/b84f83d2_nohash_0.wav
Feat shape: torch.Size([1, 64])
CNN predicts: yes


In [48]:
# %%
def run_loihi_for_feature(
    feat_vec: np.ndarray,
    W: np.ndarray,
    sim_time: float = 0.1
) -> np.ndarray:
    """
    Run a 64-D feature vector through a tiny LIF ensemble on the
    Loihi emulator. W is [6,64] (fc2 weights).
    Returns logits_loihi [6].
    """
    assert feat_vec.shape == (64,), f"Expected (64,), got {feat_vec.shape}"
    assert W.shape == (NUM_CLASSES, 64), f"W shape {W.shape} unexpected"

    with nengo.Network(seed=0) as net:
        inp = nengo.Node(output=lambda t: feat_vec)

        ens = nengo.Ensemble(
            n_neurons=64,
            dimensions=64,
            neuron_type=nengo.LIF(),
        )

        out = nengo.Node(size_in=NUM_CLASSES)

        nengo.Connection(inp, ens, synapse=None)
        nengo.Connection(
            ens.neurons,
            out,
            transform=W,
            synapse=0.01,
        )

        p_out = nengo.Probe(out, synapse=0.01)

    with nengo_loihi.Simulator(net) as sim:
        sim.run(sim_time)
        logits_loihi = sim.data[p_out][-1]

    logits_loihi = np.nan_to_num(logits_loihi)
    return logits_loihi

In [49]:
# %%
from typing import Tuple

def eval_loihi_classifier(
    loader,
    cnn_model: CNN_KWS,
    W: np.ndarray,
    device: torch.device,
    max_samples: int = 100,
    sim_time: float = 0.1,
) -> Tuple[float, float, int]:

    cnn_model.eval()
    total = 0
    correct_cnn = 0
    correct_loihi = 0

    for mfcc_batch, y_batch in loader:
        mfcc_batch = mfcc_batch.to(device)
        y_batch_np = y_batch.numpy()

        with torch.no_grad():
            feats = extract_cnn_features(mfcc_batch, cnn_model)   # [B,64]
            fc2 = cnn_model.classifier[2]
            logits_cnn = fc2(feats)                               # [B,6]
            preds_cnn = logits_cnn.argmax(dim=1).cpu().numpy()

        batch_size = feats.size(0)
        for i in range(batch_size):
            feat_np = feats[i].cpu().numpy()
            label = int(y_batch_np[i])

            # CNN prediction
            if preds_cnn[i] == label:
                correct_cnn += 1

            # Loihi prediction
            logits_loihi = run_loihi_for_feature(feat_np, W=W, sim_time=sim_time)
            pred_loihi = int(np.argmax(logits_loihi))
            if pred_loihi == label:
                correct_loihi += 1

            total += 1
            if total >= max_samples:
                cnn_acc = correct_cnn / total
                loihi_acc = correct_loihi / total
                return cnn_acc, loihi_acc, total

    cnn_acc = correct_cnn / max(total, 1)
    loihi_acc = correct_loihi / max(total, 1)
    return cnn_acc, loihi_acc, total

In [50]:
# %%
max_samples = 50    # or 100 if you want
sim_time = 0.1      # 100 ms per sample

cnn_acc, loihi_acc, total = eval_loihi_classifier(
    loader=test_loader,
    cnn_model=cnn_model,
    W=W,
    device=device,
    max_samples=max_samples,
    sim_time=sim_time,
)

print(f"Evaluated on {total} test samples")
print(f"CNN head accuracy:    {cnn_acc*100:.2f}%")
print(f"Loihi classifier acc: {loihi_acc*100:.2f}%")

Evaluated on 50 test samples
CNN head accuracy:    44.00%
Loihi classifier acc: 20.00%
