In [None]:
import warnings
warnings.filterwarnings("ignore")

import librosa

import os
import numpy as np
import pickle
from tensorflow.keras.models import load_model

In [None]:
class_labels = ['Acrocephalus', 'Bubo', 'Caprimulgus', 'Emberiza', 'Ficedula', 'Glaucidium', 'Hippolais']

In [3]:
model = load_model("models/ArtificialNeuralNetwork_model.h5")

In [4]:
with open(file="models/Scaler.pkl", mode="rb") as file:
    scaler = pickle.load(file=file)

In [5]:
def audio_to_signal(path):
    y, sr = librosa.load(path, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
        
    return to_append.split()

---

In [7]:
user_input_path = "user_input/Ficedula (3).wav"

In [8]:
input_data = audio_to_signal(user_input_path)
input_data = [float(x) for x in input_data]

In [9]:
print(input_data)

[0.6602177619934082, 0.025315677747130394, 1971.3288345965448, 2201.3272493486575, 3921.5429536329334, 0.0795871982633514, -445.1826171875, 68.50846862792969, 16.667224884033203, 45.806678771972656, -4.597455978393555, 22.789072036743164, 11.248339653015137, 12.05607795715332, 4.648726940155029, 12.900672912597656, 7.74078369140625, 5.227572917938232, 6.800922393798828, 9.159720420837402, 4.310463905334473, 10.25788688659668, 3.055110454559326, 6.891692161560059, 2.9915318489074707, 8.031048774719238]


In [10]:
input_data = np.array([input_data])
scaled_data = scaler.transform(input_data)

In [11]:
print(scaled_data)

[[ 1.5055534   0.27288854 -0.89385409  0.15569968 -0.58184264 -1.04693854
  -0.26696205  0.38253569  0.90039399  0.87717425  0.05243413  0.68425994
   1.00276925  1.02921235  0.63392876  1.41479445  1.06916421  0.16423825
   0.86870474  0.70338759  0.55783079  1.19257615  0.49616955  0.96301358
   0.81465397  1.21773832]]


In [12]:
prediction = model.predict(scaled_data, verbose=1)

class_label = np.argmax(prediction)
class_name = class_labels[class_label]
probability = prediction[0][class_label]

print(class_label)
print(class_name)
print(probability)

4
Ficedula
0.63588977
