In [8]:
conda install pytorch torchvision torchaudio -c pytorch -c conda-forge



  conda config --add channels defaults

For more information see https://docs.conda.io/projects/conda/en/stable/user-guide/configuration/use-condarc.html

  deprecated.topic(


Channels:
 - pytorch
 - conda-forge
 - defaults
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\USER\Desktop\KWS_SYSTEM\env

  added / updated specs:
    - pytorch
    - torchaudio
    - torchvision


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _libavif_api-1.1.1         |       h57928b3_3          11 KB  conda-forge
    _openmp_mutex-4.5          |            2_gnu          48 KB  conda-forge
    aom-3.9.1                  |       he0c23c2_0         1.9 MB  conda-forge
    ca-certificates-2025.4.26  |       h4c7d964_0         149 KB  conda-forge
    cairo-1.18.4               |       h5782bbf_0         1.5 MB  conda-forge
    certifi-2025.4.26          |     pyhd8ed1ab_0         154 KB  conda-forge
    dav1d-1.2.1                |       hcfcfb64_0         60

In [9]:
import torch
print(torch.__version__)
print("CUDA available:", torch.cuda.is_available())


2.5.1
CUDA available: False


In [27]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import pandas as pd

# --- Dataset class ---
class KeywordDataset(Dataset):
    def __init__(self, feature_dir, max_len=80):
        self.data = []
        self.labels = []
        self.label_map = {}
        self.max_len = max_len
        idx = 0

        for label in sorted(os.listdir(feature_dir)):
            label_path = os.path.join(feature_dir, label)
            if not os.path.isdir(label_path):
                continue

            if label not in self.label_map:
                self.label_map[label] = idx
                idx += 1

            for file in os.listdir(label_path):
                if file.endswith(".npy"):
                    path = os.path.join(label_path, file)
                    feat = np.load(path)
                    self.data.append(feat.astype(np.float32))
                    self.labels.append(self.label_map[label])

        self.labels = np.array(self.labels)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = self.data[idx]
        c, t = x.shape  # (freq, time)
        if t < self.max_len:
            pad = self.max_len - t
            x = np.pad(x, ((0, 0), (0, pad)), mode='constant')
        elif t > self.max_len:
            x = x[:, :self.max_len]
        x = np.expand_dims(x, axis=0)  # (1, freq, time)
        return x, self.labels[idx]

# --- CNN model ---
class SimpleCNN(nn.Module):
    def __init__(self, input_shape=(1, 13, 80), num_classes=10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout(0.3)

        # 🔧 Dynamically determine the input size for fc1
        self.flatten_dim = self._get_flatten_size(input_shape)
        self.fc1 = nn.Linear(self.flatten_dim, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def _get_flatten_size(self, shape):
        with torch.no_grad():
            x = torch.zeros(1, *shape)
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            return x.view(1, -1).shape[1]

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        return self.fc2(x)

# --- Training function ---
def train_and_evaluate(feature_dir, max_len=80, epochs=5):
    dataset = KeywordDataset(feature_dir, max_len=max_len)
    if len(dataset) < 10: return None  # skip empty sets

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_set, val_set = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=32)

   # model = SimpleCNN().to(device)
    #model = SimpleCNN(input_shape=(1, 40, 80)).to(device)
    # Decide input shape based on feature type
    input_shape = (1, 13, 80) if 'mfcc' in feature_type else (1, 40, 80)
    model = SimpleCNN(input_shape=input_shape).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
           # xb, yb = torch.tensor(xb).to(device), torch.tensor(yb).to(device)
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()

    # Evaluate
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = torch.tensor(xb).to(device)
            out = model(xb)
            preds.extend(torch.argmax(out, axis=1).cpu().numpy())
            labels.extend(yb.numpy())

    return accuracy_score(labels, preds)

# --- Run all configs ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

for feature_type in ['features_mfcc', 'features_mel']:
    base_path = feature_type
    for win_ms in range(10, 101, 10):
        for overlap in [0, 25, 50, 75]:
            path = f"{base_path}/win_{win_ms}/overlap_{overlap}"
            print(f"Training: {feature_type}, Win={win_ms}ms, Overlap={overlap}%")
            acc = train_and_evaluate(path)
            if acc is not None:
                results.append({
                    "Feature": feature_type.replace("features_", ""),
                    "Window": win_ms,
                    "Overlap": overlap,
                    "Accuracy": round(acc, 4)
                })

# Save results to CSV
df = pd.DataFrame(results)
df.to_csv("results_summary.csv", index=False)
print("\n✅ Evaluation complete. Results saved to results_summary.csv")


Training: features_mfcc, Win=10ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=10ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=10ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=10ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=20ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=20ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=20ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=20ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=30ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=30ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=30ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=30ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=40ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=40ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=40ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=40ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=50ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=50ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=50ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=50ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=60ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=60ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=60ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=60ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=70ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=70ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=70ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=70ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=80ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=80ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=80ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=80ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=90ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=90ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=90ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=90ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=100ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=100ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=100ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mfcc, Win=100ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=10ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=10ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=10ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=10ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=20ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=20ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=20ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=20ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=30ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=30ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=30ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=30ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=40ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=40ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=40ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=40ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=50ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=50ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=50ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=50ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=60ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=60ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=60ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=60ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=70ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=70ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=70ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=70ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=80ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=80ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=80ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=80ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=90ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=90ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=90ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=90ms, Overlap=75%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=100ms, Overlap=0%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=100ms, Overlap=25%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=100ms, Overlap=50%


  xb = torch.tensor(xb).to(device)


Training: features_mel, Win=100ms, Overlap=75%


  xb = torch.tensor(xb).to(device)



✅ Evaluation complete. Results saved to results_summary.csv


In [28]:
torch.save(model.state_dict(), 'best_model.pth')

In [29]:
print(dataset.label_map)

{'down': 0, 'go': 1, 'left': 2, 'no': 3, 'off': 4, 'on': 5, 'right': 6, 'stop': 7, 'up': 8, 'yes': 9}
