In [7]:
!pip install librosa

Defaulting to user installation because normal site-packages is not writeable
Collecting librosa
  Downloading librosa-0.10.1-py3-none-any.whl.metadata (8.3 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting numba>=0.51.0 (from librosa)
  Downloading numba-0.59.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl.metadata (14 kB)
Collecting pooch>=1.0 (from librosa)
  Downloading pooch-1.8.1-py3-none-any.whl.metadata (9.5 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.3.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting llvmlite<0.43,>=0.42.0dev0 (from numba>=0.51.0->librosa)
  Down

In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import librosa
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [89]:
def get_sample(AUDIO_FILE = './Recordings/up/1.m4a'):
    samples, sample_rate = librosa.load(AUDIO_FILE, sr=None)
    for i in range(len(samples)):
        if (abs(samples[i]) < 0.5):
            continue
        break
    sub_samples = samples[i-int(sample_rate/5):i+int(sample_rate/5)]
    if len(sub_samples) < 19200:
        return([])
    return(sub_samples)

def make_mel(samples, sample_rate=48000, n_fft = 1024):
    mel_spectrogram = librosa.feature.melspectrogram(
        y=samples,
        sr=sample_rate,
        n_fft=n_fft,
        hop_length=n_fft + 1,
        n_mels=128,  # Number of Mel bands (adjust as needed)
        fmin=20,     # Minimum frequency (adjust as needed)
        fmax=8000    # Maximum frequency (adjust as needed)
    )
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return(mel_spectrogram_db)

In [90]:
x_train = [
    get_sample(AUDIO_FILE = './Recordings/up/1.m4a'),
    get_sample(AUDIO_FILE = './Recordings/up/2.m4a'),
    get_sample(AUDIO_FILE = './Recordings/down/1.m4a'),
    get_sample(AUDIO_FILE = './Recordings/down/2.m4a')
]
y_train = [
    1,
    1,
    0,
    0
]

x_test = [
    get_sample(AUDIO_FILE = './Recordings/up/3.m4a'),
    get_sample(AUDIO_FILE = './Recordings/down/3.m4a')
]
y_test = [
    1,
    0
]

x_train_mel = []
x_test_mel = []

for xtr in x_train:
    x_train_mel.append(make_mel(xtr))
    
for xte in x_test:
    x_test_mel.append(make_mel(xte))

x_train_mel = np.array(x_train_mel)
x_test_mel  = np.array(x_test_mel)

# SVM trained on raw sample data

In [110]:
%%time
svm_model = SVC(kernel='linear', C=1.0)

svm_model.fit(x_train, y_train)

CPU times: user 7.94 ms, sys: 0 ns, total: 7.94 ms
Wall time: 5.25 ms


In [111]:
%%time
# Predict on validation set
y_pred = svm_model.predict(x_test)
print(y_pred)
# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Validation Accuracy: {accuracy:.2f}")

[1 0]
Validation Accuracy: 1.00
CPU times: user 7.13 ms, sys: 29.3 ms, total: 36.4 ms
Wall time: 13.8 ms


# SVM trained on mel spectrographs

In [112]:
# SVM trained on raw sample data
svm_model_mel = SVC(kernel='linear', C=1.0)

svm_model_mel.fit(x_train_mel.reshape(len(x_train_mel),-1), y_train)

CPU times: user 658 µs, sys: 10.2 ms, total: 10.8 ms
Wall time: 8.06 ms


In [113]:
%%time
# Predict on validation set
y_pred = svm_model_mel.predict(x_test_mel)
print(y_pred)
# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Validation Accuracy: {accuracy:.2f}")

[1 0]
Validation Accuracy: 1.00
CPU times: user 5.13 ms, sys: 0 ns, total: 5.13 ms
Wall time: 4.38 ms


# CNN

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Create a Sequential model
model = Sequential()

# Add a 2D convolutional layer with 32 filters, kernel size (3, 3), and input shape (128, 19, 1)
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(128, 19, 1), activation='relu'))

# Add a max pooling layer with pool size (2, 2)
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output
model.add(Flatten())

# Add a fully connected layer with 64 units and relu activation
model.add(Dense(64, activation='relu'))

# Add the output layer with 1 unit and sigmoid activation
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


In [140]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Specify data augmentation options (you can customize these as needed)
datagen = ImageDataGenerator(
    rescale=1./255
)

In [160]:
x_train_mel[:,:,:,np.newaxis].shape

(4, 128, 19, 1)

In [167]:
dataset = tf.data.Dataset.from_tensor_slices((x_train_mel.reshape(-1,128,19,1),y_train))
#dataset = tf.data.Dataset.from_tensor_slices((x_train_mel,y_train))


In [173]:
model.fit(dataset, epochs=10, batch_size=4)

# Evaluate the model on the test data (assuming you have x_test and y_test)
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

Epoch 1/10


ValueError: Exception encountered when calling Conv2D.call().

[1mNegative dimension size caused by subtracting 3 from 1 for '{{node sequential_1_1/conv2d_1_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_1_1/ExpandDims, sequential_1_1/conv2d_1_1/convolution/ReadVariableOp)' with input shapes: [128,19,1,1], [3,3,1,32].[0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(128, 19, 1, 1), dtype=float32)