In [31]:
# 🛠️ 1. Import libraries
import numpy as np
import tensorflow as tf
import os
import librosa
import random
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [32]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
import os

folder_path = '/content/drive/My Drive/data'
os.listdir(folder_path)  # Lists contents of the folder


['Asean_Koel',
 'hen_cock-export',
 'crow_dataset',
 'Noise',
 'Rose_ringed_Parkeet']

In [43]:
from scipy.io import wavfile

# Replace with the path to your .wav file
file_path = '/content/drive/MyDrive/data/Rose_ringed_Parkeet/Rose-ringed Parakeet.5polkukm.ingestion-54c4c64498-k9fpq.s1.wav'

# Read the WAV file
sample_rate, data = wavfile.read(file_path)

print("Sampling rate (Hz):", sample_rate)
print("Audio duration (sec):", len(data) / sample_rate)


Sampling rate (Hz): 16000
Audio duration (sec): 1.0


  sample_rate, data = wavfile.read(file_path)


In [44]:
import numpy as np
import os
import librosa

# 🛠️ 2. Prepare your dataset
DATASET_PATH = folder_path  # replace with your path
commands = ['Asean_Koel', 'crow_dataset', 'hen_cock-export', 'Noise', 'Rose_ringed_Parkeet']

X = []
Y = []

SAMPLE_RATE = 16000
DURATION = 1  # 1 second clips
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
FFT_SIZE = 1024
MFCC_FEATURES = 26

def extract_mfcc_manual(audio, fft_size=FFT_SIZE, sample_rate=SAMPLE_RATE, mfcc_features=MFCC_FEATURES):
    # Step 1: Apply Hamming window
    window = 0.54 - 0.46 * np.cos(2 * np.pi * np.arange(fft_size) / (fft_size - 1))
    windowed_signal = audio[:fft_size] * window

    # Step 2: Compute FFT
    fft_output = np.fft.rfft(windowed_signal)
    magnitude = np.abs(fft_output)

    # Step 3: Extract MFCC-like features by mapping to specific bins
    features = []
    for j in range(mfcc_features):
        bin_idx = int(np.interp(j, [0, mfcc_features - 1], [2, fft_size // 2 - 1]))  # Adjust FFT bin
        energy = np.log10(magnitude[bin_idx] + 1e-6)  # Log energy
        q = int(np.clip(energy / 0.05, -128, 127))  # Quantization to int8
        features.append(q)

    return np.array(features, dtype=np.int8)

# Load and process audio files
for label_idx, label in enumerate(commands):
    folder_path = os.path.join(DATASET_PATH, label)
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".wav"):
            file_path = os.path.join(folder_path, file_name)
            audio, sr = librosa.load(file_path, sr=SAMPLE_RATE)

            if len(audio) < SAMPLES_PER_TRACK:
                audio = np.pad(audio, (0, SAMPLES_PER_TRACK - len(audio)))
            else:
                audio = audio[:SAMPLES_PER_TRACK]

            # Simulate MFCC extraction as done in Arduino code
            mfcc_features = extract_mfcc_manual(audio)

            X.append(mfcc_features)
            Y.append(label_idx)

X = np.array(X)
Y = np.array(Y)

print(f"Dataset shape: {X.shape}, Labels shape: {Y.shape}")


Dataset shape: (2518, 26), Labels shape: (2518,)


In [47]:
# 🛠️ 3. Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer

'''model = Sequential([
    InputLayer(input_shape=(13,)),   # Flat MFCC array
    Dense(32, activation='relu'),
    Dense(32, activation='relu'),
    Dense(len(commands), activation='softmax')
])'''

from tensorflow.keras.layers import Dropout, GaussianNoise

model = Sequential([
    GaussianNoise(0.1, input_shape=(26,)),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(len(commands), activation='softmax')
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [50]:
# 🛠️ 5. Train model
history = model.fit(X_train, Y_train, epochs=200, batch_size=32, validation_data=(X_test, Y_test))

Epoch 1/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7160 - loss: 0.7078 - val_accuracy: 0.7183 - val_loss: 0.8266
Epoch 2/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7169 - loss: 0.7446 - val_accuracy: 0.7143 - val_loss: 0.8469
Epoch 3/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7120 - loss: 0.7170 - val_accuracy: 0.7222 - val_loss: 0.8466
Epoch 4/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7429 - loss: 0.6774 - val_accuracy: 0.7302 - val_loss: 0.8630
Epoch 5/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7310 - loss: 0.6987 - val_accuracy: 0.7183 - val_loss: 0.8518
Epoch 6/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7135 - loss: 0.7219 - val_accuracy: 0.7321 - val_loss: 0.8523
Epoch 7/200
[1m63/63[0m [32m━━━

In [51]:
# 🛠️ 6. Evaluate
loss, acc = model.evaluate(X_test, Y_test)
print(f"Test accuracy: {acc*100:.2f}%")

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7525 - loss: 0.9226 
Test accuracy: 74.80%


In [None]:
def representative_dataset():
    for i in range(100):
        sample = X_train[i]  # Shape: (13,)
        sample = np.expand_dims(sample, axis=0)  # Shape: (1, 13)
        yield [sample.astype(np.float32)]


# Convert the model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset  # Needed for full int8

# Force all ops to be int8
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# Set input/output types to int8
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

# Convert
tflite_model = converter.convert()

# Save model
with open("birdsong_model_quantized.tflite", "wb") as f:
    f.write(tflite_model)

print(f"TFLite model size: {len(tflite_model) / 1024:.2f} KB")


Saved artifact at '/tmp/tmpuads5lxf'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 26), dtype=tf.float32, name='keras_tensor_34')
Output Type:
  TensorSpec(shape=(None, 5), dtype=tf.float32, name=None)
Captures:
  133940298370512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133940298369168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133940298369552: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133940298367056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133940298369360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133940298370128: TensorSpec(shape=(), dtype=tf.resource, name=None)
TFLite model size: 8.96 KB




In [55]:
!xxd -i birdsong_model_quantized.tflite > birdsong_model.cc


In [57]:
from google.colab import files
files.download("birdsong_model_quantized.tflite")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>