# Student Audio Classifier (Capuchinbird Detection)
This notebook was made by a student (me!) trying to figure out audio classification.

## Step 1: Install Dependencies

In [1]:
!pip install tensorflow tensorflow-io matplotlib



## Step 2: Import Libraries

In [2]:
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D
import numpy as np
import matplotlib.pyplot as plt
import os
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

TypeError: unhashable type: 'list'

## Step 3: Load One Sample Audio

In [3]:
CAPUCHIN_FILE = "audio/Parsed_Capuchinbird_Clips/XC3776-3.wav"
NOT_CAPUCHIN_FILE = "audio/Parsed_Not_Capuchinbird_Clips/xxx.wav"

In [4]:
def load_wav_16k_mono(filename):
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [5]:
wave = load_wav_16k_mono(CAPUCHIN_FILE)
nwave = load_wav_16k_mono(NOT_CAPUCHIN_FILE)

plt.figure(figsize=(10, 4))
plt.plot(wave, label='Capuchinbird')
plt.plot(nwave, label='Not Capuchin')
plt.legend()
plt.title('Waveforms')
plt.show()

NotImplementedError: in user code:

    File "C:\development\deep-audio-classifier\venv\lib\site-packages\tensorflow_io\python\ops\audio_ops.py", line 458, in f
        return core_ops.io_audio_resample(
    File "C:\development\deep-audio-classifier\venv\lib\site-packages\tensorflow_io\python\ops\__init__.py", line 88, in __getattr__
        return getattr(self._load(), attrb)
    File "C:\development\deep-audio-classifier\venv\lib\site-packages\tensorflow_io\python\ops\__init__.py", line 84, in _load
        self._mod = _load_library(self._library)
    File "C:\development\deep-audio-classifier\venv\lib\site-packages\tensorflow_io\python\ops\__init__.py", line 69, in _load_library
        raise NotImplementedError(

    NotImplementedError: unable to open file: libtensorflow_io.so, from paths: ['C:\\development\\deep-audio-classifier\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so']
    caused by: ['C:\\development\\deep-audio-classifier\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so not found']


## Next Steps
Now that we loaded our audio, we’ll move on to making datasets, building spectrograms, and training a model! 🚀

## Step 4: Make TensorFlow Dataset

In [None]:

POS = os.path.join('audio', 'Parsed_Capuchinbird_Clips')
NEG = os.path.join('audio', 'Parsed_Not_Capuchinbird_Clips')

pos_ds = tf.data.Dataset.list_files(POS + '/*.wav')
neg_ds = tf.data.Dataset.list_files(NEG + '/*.wav')


In [None]:

# Assign labels (1 for capuchin, 0 for not)
positives = pos_ds.map(lambda x: (x, tf.constant(1)))
negatives = neg_ds.map(lambda x: (x, tf.constant(0)))

# Combine datasets
all_ds = positives.concatenate(negatives)


## Step 5: Convert WAV to Spectrogram

In [None]:

def preprocess(file_path, label):
    wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

# Map function to dataset
all_ds = all_ds.map(preprocess)


## Step 6: Train/Test Split

In [None]:

all_ds = all_ds.cache().shuffle(1000).batch(16).prefetch(tf.data.AUTOTUNE)
train = all_ds.take(36)
test = all_ds.skip(36).take(15)


## Step 7: Build a Model

In [None]:
model = Sequential([
    tf.keras.Input(shape=(64, 64, 1)),
    Conv2D(16, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

## Step 8: Train the Model

In [None]:

hist = model.fit(train, epochs=4, validation_data=test)


In [None]:

plt.plot(hist.history['loss'], label='loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Loss over Epochs')
plt.show()


## Step 9: Test Predictions

In [None]:

X_test, y_test = test.as_numpy_iterator().next()
yhat = model.predict(X_test)
print("Raw predictions:", yhat[:5])


In [None]:

# Convert probabilities to binary class labels
yhat_classes = [1 if p > 0.5 else 0 for p in yhat]
print("Predicted classes:", yhat_classes[:5])


## Step 10: Save Model and Done!

In [None]:

model.save('models/student_capuchin_model.h5')
print("Model saved!")
