In [2]:
import librosa
import numpy as np
import os

In [3]:
positive_dataset = os.listdir(os.path.join('..', 'samples', 'positive'))
negative_dataset = os.listdir(os.path.join('..', 'samples', 'negative'))

In [4]:
SAMPLE_RATE = 16000
DURATION = 1.5

---
### Test Stated


In [31]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=8000, duration=1.5)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    return np.mean(mfcc.T, axis=0)

In [32]:
import pandas as pd

In [54]:
data = []
labels = []

In [55]:
for file in positive_dataset:
    features = extract_features(os.path.join('../samples/positive', file))
    data.append(features)
    labels.append(1)

In [56]:
for file in negative_dataset:
    features = extract_features(os.path.join('../samples/negative', file))
    data.append(features)
    labels.append(0)

In [57]:
print(np.array(data).shape)
print(np.array(labels).shape)

# before negative
# (377, 40)
# (377,)

# after negative included
# (878, 40)
# (878,)

(878, 40)
(878,)


In [62]:
df = pd.DataFrame(data)

In [64]:
df["label"] = labels

In [66]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,label
0,-182.537399,86.565063,-12.986511,20.405024,-5.364577,12.565320,0.101523,8.087031,-6.353205,6.792905,...,0.932926,-1.721535,2.347887,-1.355222,1.082748,-2.732507,0.775575,-5.316260,-0.752545,1
1,-159.626541,87.873070,8.400023,28.131775,-2.168035,9.445657,-1.105764,12.874825,-0.871865,-0.888309,...,1.972351,-3.280976,2.519566,-3.622287,0.430134,-2.613431,4.573012,-4.034807,1.668520,1
2,-167.042191,101.693970,-10.497649,-1.948504,-5.283806,4.409165,-7.433646,6.585741,-1.054236,1.845864,...,-1.615873,-5.865578,3.481190,2.684396,9.643991,3.107458,4.412710,-3.521733,0.791641,1
3,-209.341476,96.355835,-5.061976,16.887213,-0.408311,10.966382,-5.957483,8.431193,0.311275,6.172767,...,0.141452,-2.927981,2.124492,-2.427243,5.889200,1.543471,3.881313,-3.530347,-0.820171,1
4,-174.549759,83.628075,-4.715631,11.865753,-10.440622,6.825201,-7.256752,8.738505,-9.724129,2.074056,...,3.619828,-7.543792,-0.874020,-3.760430,5.317819,-4.452523,1.397836,-2.804180,-1.068789,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
873,-197.710709,82.850929,2.380143,19.107935,-4.930069,11.150809,-9.221920,4.654821,-7.401761,5.338705,...,1.528580,-5.760756,2.065733,-2.725697,3.691555,-3.511689,2.393312,-3.532225,1.028423,0
874,-165.151840,84.042381,-0.971782,16.261053,-16.351496,3.403297,-15.364736,-4.109573,-4.476860,2.532765,...,2.297409,-7.039461,-0.535730,-4.036747,2.320846,-8.205886,0.518593,-2.576680,0.810072,0
875,-193.365524,79.952393,-1.608006,16.269396,-8.011728,7.369267,-9.285617,0.513074,-8.505024,1.229052,...,1.103950,-5.091547,1.997867,-3.906316,2.991428,-3.435692,2.397051,-3.581451,0.245209,0
876,-27.019033,25.496912,-46.278004,6.449645,-24.586557,-10.281548,3.857861,1.259932,-16.495911,-1.857202,...,-0.870397,-2.454733,-0.687203,-1.351474,9.316044,-9.756568,1.589043,-0.424946,5.004022,0


In [67]:
from sklearn.model_selection import train_test_split

In [68]:
X = df.drop("label", axis=1).values
y = df["label"].values

In [69]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,label
0,-182.537399,86.565063,-12.986511,20.405024,-5.364577,12.565320,0.101523,8.087031,-6.353205,6.792905,...,0.932926,-1.721535,2.347887,-1.355222,1.082748,-2.732507,0.775575,-5.316260,-0.752545,1
1,-159.626541,87.873070,8.400023,28.131775,-2.168035,9.445657,-1.105764,12.874825,-0.871865,-0.888309,...,1.972351,-3.280976,2.519566,-3.622287,0.430134,-2.613431,4.573012,-4.034807,1.668520,1
2,-167.042191,101.693970,-10.497649,-1.948504,-5.283806,4.409165,-7.433646,6.585741,-1.054236,1.845864,...,-1.615873,-5.865578,3.481190,2.684396,9.643991,3.107458,4.412710,-3.521733,0.791641,1
3,-209.341476,96.355835,-5.061976,16.887213,-0.408311,10.966382,-5.957483,8.431193,0.311275,6.172767,...,0.141452,-2.927981,2.124492,-2.427243,5.889200,1.543471,3.881313,-3.530347,-0.820171,1
4,-174.549759,83.628075,-4.715631,11.865753,-10.440622,6.825201,-7.256752,8.738505,-9.724129,2.074056,...,3.619828,-7.543792,-0.874020,-3.760430,5.317819,-4.452523,1.397836,-2.804180,-1.068789,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
873,-197.710709,82.850929,2.380143,19.107935,-4.930069,11.150809,-9.221920,4.654821,-7.401761,5.338705,...,1.528580,-5.760756,2.065733,-2.725697,3.691555,-3.511689,2.393312,-3.532225,1.028423,0
874,-165.151840,84.042381,-0.971782,16.261053,-16.351496,3.403297,-15.364736,-4.109573,-4.476860,2.532765,...,2.297409,-7.039461,-0.535730,-4.036747,2.320846,-8.205886,0.518593,-2.576680,0.810072,0
875,-193.365524,79.952393,-1.608006,16.269396,-8.011728,7.369267,-9.285617,0.513074,-8.505024,1.229052,...,1.103950,-5.091547,1.997867,-3.906316,2.991428,-3.435692,2.397051,-3.581451,0.245209,0
876,-27.019033,25.496912,-46.278004,6.449645,-24.586557,-10.281548,3.857861,1.259932,-16.495911,-1.857202,...,-0.870397,-2.454733,-0.687203,-1.351474,9.316044,-9.756568,1.589043,-0.424946,5.004022,0


In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [None]:
from tensorflow.keras import layers, models

In [75]:
model = models.Sequential([
    layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')  # binary classification
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [76]:
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_test, y_test)
)

Epoch 1/30
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.6068 - loss: 3.1975 - val_accuracy: 0.8182 - val_loss: 0.5097
Epoch 2/30
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7009 - loss: 1.6644 - val_accuracy: 0.8693 - val_loss: 0.4602
Epoch 3/30
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7764 - loss: 0.8770 - val_accuracy: 0.9205 - val_loss: 0.2982
Epoch 4/30
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7991 - loss: 0.6228 - val_accuracy: 0.9148 - val_loss: 0.2942
Epoch 5/30
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8148 - loss: 0.5007 - val_accuracy: 0.9091 - val_loss: 0.3174
Epoch 6/30
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8348 - loss: 0.4816 - val_accuracy: 0.9148 - val_loss: 0.2865
Epoch 7/30
[1m22/22[0m [32m━━━━━━

In [78]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")
print(f"Loss: {loss:.2f}")

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9545 - loss: 0.1572 
Test Accuracy: 0.95
Loss: 0.16


In [80]:
model.save("cortana_detector.keras")

### Test End Here
---

In [5]:
def preprocess_dataset():
    print("Processing dataset...")
    all_mfccs = []
    all_labels = []

    expected_samples = int(SAMPLE_RATE * DURATION)

    for label, dataset in enumerate([negative_dataset, positive_dataset]):
        for index, file_name in enumerate(dataset):
            file_path = os.path.join('..', 'samples', 'positive' if label == 1 else 'negative', file_name)

            try:
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                if len(signal) > expected_samples:
                        signal = signal[:expected_samples]
                elif len(signal) < expected_samples:
                        signal = np.pad(signal, (0, expected_samples - len(signal)), 'constant')

                mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13, n_fft=2048, hop_length=512)

                mfcc = mfcc.T

                all_mfccs.append(mfcc)
                all_labels.append(label)

                print(f'Processed data {index}:{file_name} success')
            except Exception as e:
                print(f'Failed to process {file_name}: {e}')

    return np.array(all_mfccs), np.array(all_labels)

In [6]:
X_data, y_data = preprocess_dataset()

print(f"Shape of X_data (MFCCs): {X_data.shape}")
print(f"Shape of y_data (labels): {y_data.shape}")

Processing dataset...
Processed data 0:90_sample.wav success
Processed data 1:6_sample.wav success
Processed data 2:165_sample.wav success
Processed data 3:399_sample.wav success
Processed data 4:422_sample.wav success
Processed data 5:79_sample.wav success
Processed data 6:349_sample.wav success
Processed data 7:74_sample.wav success
Processed data 8:324_sample.wav success
Processed data 9:125_sample.wav success
Processed data 10:332_sample.wav success
Processed data 11:467_sample.wav success
Processed data 12:118_sample.wav success
Processed data 13:232_sample.wav success
Processed data 14:459_sample.wav success
Processed data 15:242_sample.wav success
Processed data 16:497_sample.wav success
Processed data 17:114_sample.wav success
Processed data 18:237_sample.wav success
Processed data 19:412_sample.wav success
Processed data 20:4_sample.wav success
Processed data 21:476_sample.wav success
Processed data 22:266_sample.wav success
Processed data 23:222_sample.wav success
Processed d

In [7]:
X_data = X_data[..., np.newaxis]

In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

2025-09-07 01:13:11.264918: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data, test_size=0.2, random_state=42, stratify=y_data
)

Data split into training and testing sets

In [10]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (702, 47, 13, 1)
X_test shape: (176, 47, 13, 1)


In [11]:
input_shape = (X_train.shape[1], X_train.shape[2], 1)

In [17]:
model = models.Sequential([
    # Input Layer
    layers.Input(shape=input_shape),

    # # First Convolutional Block
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Second Convolutional Block
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Flatten the features to feed into the dense layer
    layers.Flatten(),

    # Dense Layer for classification
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5), # Dropout helps prevent overfitting

    # Output Layer
    # Sigmoid is used for binary (2-class) classification
    layers.Dense(1, activation='sigmoid')
])

In [18]:
model.summary()

In [27]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy', # Perfect for Yes/No (1/0) classification
    metrics=['accuracy']
)

Model Training

In [28]:
history = model.fit(
    X_train,
    y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test)
)

Epoch 1/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.6652 - loss: 1.8361 - val_accuracy: 0.8807 - val_loss: 0.3795
Epoch 2/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.8063 - loss: 0.4705 - val_accuracy: 0.8920 - val_loss: 0.3124
Epoch 3/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.8618 - loss: 0.3483 - val_accuracy: 0.8807 - val_loss: 0.2771
Epoch 4/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - accuracy: 0.8960 - loss: 0.2878 - val_accuracy: 0.9148 - val_loss: 0.1791
Epoch 5/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.9117 - loss: 0.2529 - val_accuracy: 0.9432 - val_loss: 0.1418
Epoch 6/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.9245 - loss: 0.2094 - val_accuracy: 0.9318 - val_loss: 0.1415
Epoch 7/20
[1m22/22[0m [32m━━━━

Training Complete

In [30]:
model.save('wake_word.keras')