In [1]:
!pip install seaborn librosa scikit-learn tensorflow keras



In [2]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# for displaying the audio
from IPython.display import Audio

# for loading the audio file
import librosa
import librosa.display
import random
from sklearn.model_selection import train_test_split

from sklearn.model_selection import train_test_split
# from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [3]:
!pip install h5py



In [None]:
from google.colab import files

uploaded = files.upload()


In [None]:
import h5py

# the file name is spectrograms.h5
file_path = 'spectrograms.h5'

try:
    # Open the HDF5 file and list all keys
    with h5py.File(file_path, 'r') as f:
        keys = list(f.keys())
        print("Keys in the HDF5 file:", keys)
except Exception as e:
    print(f"Error opening the HDF5 file: {e}")


In [None]:


f = h5py.File(file_path, 'r')
list(f.keys())


In [None]:
dset = f['bkcchi']
dset.shape

In [None]:
with h5py.File(file_path, 'r') as f:
    for species in f.keys():
        data = np.array(f[species])

        # Visualizing the first spectrogram of each species
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(librosa.power_to_db(data[0], ref=np.max),
                                 y_axis='mel', fmax=8000, x_axis='time', cmap='viridis')
        plt.colorbar(format='%+2.0f dB')
        plt.title(f'Spectrogram of {species}')
        plt.tight_layout()
        plt.show()

In [None]:

try:
    with h5py.File(file_path, 'r') as f:
        amecro_spectrograms = np.array(f['amecro'])
        stejay_spectrograms = np.array(f['stejay'])
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
# Combine spectrograms into one list for processing
two_species = list(amecro_spectrograms) + list(stejay_spectrograms)

# Find the maximum length of the time dimension among all spectrograms
max_len = max(i.shape[1] for i in two_species )
max_len

In [None]:
padded_data = []

# Pad each spectrogram to have the same time dimension
for i in two_species :
    print('i',i)
    padding_width = max_len - i.shape[1]
    pad = np.pad(i, pad_width=((0, 0), (0, padding_width)), mode='constant', constant_values=0)
    padded_data.append(pad)

# Convert list of arrays to a single numpy array
padded_data = np.array(padded_data)

# Create labels: '0' for 'amecro' and '1' for 'blujay'
labels = np.concatenate([np.zeros(len(amecro_spectrograms)), np.ones(len(stejay_spectrograms))])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(padded_data, labels, test_size=0.2, random_state=1)

In [None]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:

np.random.seed(123)
tf.random.set_seed(123)

b1 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(343,52,1)),
    tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

b1.compile(optimizer='adam', loss='binary_crossentropy',  metrics=['accuracy'])

history = b1.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10)

plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Binary Model performance')
plt.xlabel('Epoch')
plt.ylabel('Performance')
plt.legend()
plt.show()

In [None]:
train_loss, train_acc = b1.evaluate(X_train, y_train)
print(f'Train loss: {train_loss}, Train accuracy: {train_acc}')

In [None]:

test_loss, test_acc = b1.evaluate(X_test, y_test)
print(f'Test loss: {test_loss}, Test accuracy: {test_acc}')

The binary classification model designed to identify American Crow and Steller's Jay has demonstrated exceptional performance, achieving perfect accuracy (1.0) and remarkably low loss in both training and testing phases. Specifically, the training loss was approximately 0.0000133, with an even lower test loss of about 0.0000082, showcasing the model's ability to generalize effectively to new data without any overfitting issues. These results suggest that the model is stable and finely calibrated, potentially suitable for real-world deployment, assuming it continues to perform consistently well in more extensive validation and practical evaluations.


In [None]:

np.random.seed(123)
tf.random.set_seed(123)

b2 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(343,52,1)),
    tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.7),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

b2.compile(optimizer='adam', loss='binary_crossentropy',  metrics=['accuracy'])

history = b2.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5)

# Plot the accuracy and loss
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Binary Model performance')
plt.xlabel('Epoch')
plt.ylabel('Performance')
plt.legend()
plt.show()

# Multiclass

In [None]:
import h5py
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Specify the file path
file_path = 'spectrograms.h5'
species_keys = ['amecro', 'barswa', 'bkcchi', 'blujay', 'daejun', 'houfin', 'mallar3', 'norfli', 'rewbla', 'stejay', 'wesmea', 'whcspa']


In [None]:
try:
    with h5py.File(file_path, 'r') as f:
        all_species_data = {species: np.array(f[species]) for species in species_keys}
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
# Combine all spectrograms and labels
spectrograms = []
labels = []

for idx, species in enumerate(species_keys):
    spectrograms += list(all_species_data[species])
    labels += [idx] * len(all_species_data[species])  # Create labels based on the index in species_keys

# Find the maximum length of the time dimension among all spectrograms
max_len = max(s.shape[1] for s in spectrograms)

# Pad each spectrogram to have the same time dimension
padded_spectrograms = [np.pad(s, pad_width=((0, 0), (0, max_len - s.shape[1])), mode='constant', constant_values=0) for s in spectrograms]

# Convert lists to numpy arrays
padded_spectrograms = np.array(padded_spectrograms)
labels = np.array(labels)


In [None]:
# Split the dataset into training and testing for multi-class classification
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(padded_spectrograms, labels, test_size=0.2, random_state=1)


In [None]:
# Set seeds for reproducibility
np.random.seed(123)
tf.random.set_seed(123)

# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_multi.shape[1], X_train_multi.shape[2], 1)),  # Adjusted input shape
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(species_keys), activation='softmax')  # Output layer for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
history = model.fit(X_train_multi, y_train_multi, validation_data=(X_test_multi, y_test_multi), epochs=10)

# Plot training history
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Multi-Class Model Performance')
plt.xlabel('Epoch')
plt.ylabel('Performance')
plt.legend()
plt.show()


In [None]:
# Reshape the data to add the channel dimension
X_train_multi = X_train_multi[..., np.newaxis]
X_test_multi = X_test_multi[..., np.newaxis]

In [None]:
#Baseline Model
model_1 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_multi.shape[1], X_train_multi.shape[2], 1)),
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(species_keys), activation='softmax')
])

model_1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
#Deeper model
model_2 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_multi.shape[1], X_train_multi.shape[2], 1)),
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(len(species_keys), activation='softmax')
])

model_2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
#Model with Dropout for regularization
model_3 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_multi.shape[1], X_train_multi.shape[2], 1)),
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(species_keys), activation='softmax')
])

model_3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
#Different oprimizer
model_4 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_multi.shape[1], X_train_multi.shape[2], 1)),
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(len(species_keys), activation='softmax')
])

model_4.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
history_1 = model_1.fit(X_train_multi, y_train_multi, validation_data=(X_test_multi, y_test_multi), epochs=10)
history_2 = model_2.fit(X_train_multi, y_train_multi, validation_data=(X_test_multi, y_test_multi), epochs=10)
history_3 = model_3.fit(X_train_multi, y_train_multi, validation_data=(X_test_multi, y_test_multi), epochs=10)
history_4 = model_4.fit(X_train_multi, y_train_multi, validation_data=(X_test_multi, y_test_multi), epochs=10)

def plot_history(histories, title):
    plt.figure(figsize=(12, 8))

    for history, label in histories:
        plt.plot(history.history['accuracy'], label=f'{label} Training Accuracy')
        plt.plot(history.history['val_accuracy'], label=f'{label} Validation Accuracy')

    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

# List of model histories and labels
histories = [(history_1, 'Model 1'), (history_2, 'Model 2'), (history_3, 'Model 3'), (history_4, 'Model 4')]

# Plot training and validation accuracy for all models
plot_history(histories, 'Comparison of Model Accuracies')


In [None]:
def plot_histories(histories, title, metric='accuracy'):
    plt.figure(figsize=(14, 7))

    for history, label in histories:
        plt.plot(history.history[metric], label=f'{label} Training {metric.capitalize()}')
        plt.plot(history.history[f'val_{metric}'], label=f'{label} Validation {metric.capitalize()}')

    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel(metric.capitalize())
    plt.legend()
    plt.show()

# List of model histories and labels
histories = [
    (history_1, 'Model 1'),
    (history_2, 'Model 2'),
    (history_3, 'Model 3'),
    (history_4, 'Model 4')
]

# Plot training and validation accuracy for all models
plot_histories(histories, 'Comparison of Model Accuracies', metric='accuracy')
plot_histories(histories, 'Comparison of Model Loss', metric='loss')


After evaluating the performance of multiple neural network models with different architectures and hyperparameters for multi-class classification of bird species, Model 2 emerged as the best choice. Model 2, a deeper network with additional convolutional and dense layers, achieved perfect accuracy on both the training and validation datasets, demonstrating its ability to effectively learn and generalize the patterns in the data. It also had the lowest final training and validation loss values, indicating high confidence in its predictions. In contrast, the baseline model (Model 1) and the model with dropout regularization (Model 3) also performed well but had slightly higher loss values. The model using the SGD optimizer (Model 4) showed significantly lower performance, with reduced accuracy and higher loss. Based on these results, Model 2's superior accuracy and minimal loss make it the most robust and reliable model for accurately classifying the 12 bird species in our dataset.

In [None]:
# Save the best model using the native Keras format
model_2.save('best_bird_species_classifier.keras')


In [None]:
import librosa
import numpy as np
import h5py
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import pandas as pd
import tensorflow as tf

# Load the best model
model = tf.keras.models.load_model('best_bird_species_classifier.keras')

# Bird species labels
bird_types = ['amecro', 'barswa', 'bkcchi', 'blujay', 'daejun', 'houfin', 'mallar3', 'norfli', 'rewbla', 'stejay', 'wesmea', 'whcspa']

# Load the audio file
audio_file_path = '/content/drive/my-drive/test1.mp3'
y, sr = librosa.load(audio_file_path, sr=22050)

# Define parameters for STFT
n_fft = 512  # Keeps frequency bins around 257, which is close to 256
hop_length = max(1, int((len(y) / sr) / 343))  # Adjust hop_length to achieve ~343 time steps

frame_length = n_fft
energy = np.array([
    sum(abs(y[i:i+frame_length]**2))
    for i in range(0, len(y), hop_length)
])

# Find frames with energy higher than a threshold
threshold = np.max(energy) * 0.5
high_energy_frames = np.where(energy > threshold)[0]

# Convert frames to time
loud_times = librosa.frames_to_time(high_energy_frames, sr=sr, hop_length=hop_length)

def resize_spectrogram(spectrogram, target_shape):
    y_ratio = target_shape[0] / spectrogram.shape[0]
    x_ratio = target_shape[1] / spectrogram.shape[1]
    resized_spectrogram = zoom(spectrogram, (y_ratio, x_ratio), order=1)  # Using bilinear interpolation (order=1)
    return resized_spectrogram

# Function to generate a spectrogram and save it
def generate_spectrogram(audio, start_time, sr, filename):
    # Extract 2-second clip
    start_sample = int(start_time * sr)
    end_sample = start_sample + 2 * sr
    clip = audio[start_sample:end_sample]

    # Generate spectrogram
    D = librosa.stft(clip)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Resize the spectrogram if necessary
    if S_db.shape != (256, 343):
        S_db = resize_spectrogram(S_db, (256, 343))

    # Save spectrogram to HDF5
    with h5py.File('/content/drive/My Drive/test1_spectrograms.h5', 'a') as file:
        file.create_dataset(filename, data=S_db)
    return S_db

# Process each loud section and visualize the last one
for i, time in enumerate(loud_times):
    S_db = generate_spectrogram(y, time, sr, f'spectrogram_{i}')

librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()

def prepare_spectrogram(s_db):
    # Assuming the training data was normalized or preprocessed similarly
    s_db_normalized = s_db / np.max(np.abs(s_db))
    return np.expand_dims(s_db_normalized, axis=-1)  # Add channel dimension if needed

# Open the file with test spectrograms
with h5py.File('/content/drive/My Drive/test1_spectrograms.h5', 'r') as file:
    # List all datasets (spectrograms) in the file
    spectrogram_keys = list(file.keys())
    test_spectrograms = [prepare_spectrogram(file[key][:]) for key in spectrogram_keys]

predictions = model.predict(np.array(test_spectrograms))

predicted_classes = np.argmax(predictions, axis=1)

predicted_birds = [bird_types[i] for i in predicted_classes]

# Create a DataFrame to display results
results_df = pd.DataFrame({
    'Spectrogram Key': spectrogram_keys,
    'Predicted Bird': predicted_birds
})

# Display the DataFrame
print(results_df)


In [None]:
from google.colab import files
import librosa
import numpy as np
import h5py
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import pandas as pd
import tensorflow as tf

# Upload the file
uploaded = files.upload()

# Get the uploaded file path
audio_file_path = list(uploaded.keys())[0]

# Load the best model
model = tf.keras.models.load_model('best_bird_species_classifier.keras')

# Bird species labels
bird_types = ['amecro', 'barswa', 'bkcchi', 'blujay', 'daejun', 'houfin', 'mallar3', 'norfli', 'rewbla', 'stejay', 'wesmea', 'whcspa']

# Load the audio file
y, sr = librosa.load(audio_file_path, sr=22050)

# Define parameters for STFT
n_fft = 512  # Keeps frequency bins around 257, which is close to 256
hop_length = max(1, int((len(y) / sr) / 343))  # Adjust hop_length to achieve ~343 time steps

frame_length = n_fft
energy = np.array([
    sum(abs(y[i:i+frame_length]**2))
    for i in range(0, len(y), hop_length)
])

# Find frames with energy higher than a threshold
threshold = np.max(energy) * 0.5
high_energy_frames = np.where(energy > threshold)[0]

# Convert frames to time
loud_times = librosa.frames_to_time(high_energy_frames, sr=sr, hop_length=hop_length)

def resize_spectrogram(spectrogram, target_shape):
    y_ratio = target_shape[0] / spectrogram.shape[0]
    x_ratio = target_shape[1] / spectrogram.shape[1]
    resized_spectrogram = zoom(spectrogram, (y_ratio, x_ratio), order=1)  # Using bilinear interpolation (order=1)
    return resized_spectrogram

# Function to generate a spectrogram and save it
def generate_spectrogram(audio, start_time, sr, filename):
    # Extract 2-second clip
    start_sample = int(start_time * sr)
    end_sample = start_sample + 2 * sr
    clip = audio[start_sample:end_sample]

    # Generate spectrogram
    D = librosa.stft(clip)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Resize the spectrogram if necessary
    if S_db.shape != (256, 343):
        S_db = resize_spectrogram(S_db, (256, 343))

    # Save spectrogram to HDF5
    with h5py.File('test_spectrograms.h5', 'a') as file:
        file.create_dataset(filename, data=S_db)
    return S_db

# Process each loud section and visualize the last one
for i, time in enumerate(loud_times):
    S_db = generate_spectrogram(y, time, sr, f'spectrogram_{i}')

librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram FOT TEST1.PM3')
plt.show()

def prepare_spectrogram(s_db):
    # Assuming the training data was normalized or preprocessed similarly
    s_db_normalized = s_db / np.max(np.abs(s_db))
    return np.expand_dims(s_db_normalized, axis=-1)  # Add channel dimension if needed

# Open the file with test spectrograms
with h5py.File('test_spectrograms.h5', 'r') as file:
    # List all datasets (spectrograms) in the file
    spectrogram_keys = list(file.keys())
    test_spectrograms = [prepare_spectrogram(file[key][:]) for key in spectrogram_keys]

predictions = model.predict(np.array(test_spectrograms))

predicted_classes = np.argmax(predictions, axis=1)

predicted_birds = [bird_types[i] for i in predicted_classes]

# Create a DataFrame to display results
results_df = pd.DataFrame({
    'Spectrogram Key': spectrogram_keys,
    'Predicted Bird': predicted_birds
})

# Display the DataFrame
print(results_df)


In [None]:
import librosa
import numpy as np
import h5py
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import pandas as pd
import tensorflow as tf

# Upload the file
from google.colab import files
uploaded = files.upload()

# Get the uploaded file path
audio_file_path = '/content/test1 (1).mp3'

# Load the best model
model = tf.keras.models.load_model('best_bird_species_classifier.keras')

# Bird species labels
bird_types = ['amecro', 'barswa', 'bkcchi', 'blujay', 'daejun', 'houfin', 'mallar3', 'norfli', 'rewbla', 'stejay', 'wesmea', 'whcspa']

# Load the audio file
y, sr = librosa.load(audio_file_path, sr=22050)

# Define parameters for STFT
n_fft = 512  # Keeps frequency bins around 257, which is close to 256
hop_length = max(1, int((len(y) / sr) / 343))  # Adjust hop_length to achieve ~343 time steps

frame_length = n_fft
energy = np.array([
    sum(abs(y[i:i+frame_length]**2))
    for i in range(0, len(y), hop_length)
])

# Find frames with energy higher than a threshold
threshold = np.max(energy) * 0.5
high_energy_frames = np.where(energy > threshold)[0]

# Convert frames to time
loud_times = librosa.frames_to_time(high_energy_frames, sr=sr, hop_length=hop_length)

def resize_spectrogram(spectrogram, target_shape):
    y_ratio = target_shape[0] / spectrogram.shape[0]
    x_ratio = target_shape[1] / spectrogram.shape[1]
    resized_spectrogram = zoom(spectrogram, (y_ratio, x_ratio), order=1)  # Using bilinear interpolation (order=1)
    return resized_spectrogram

# Function to generate a spectrogram and save it
def generate_spectrogram(audio, start_time, sr, filename):
    # Extract 2-second clip
    start_sample = int(start_time * sr)
    end_sample = start_sample + 2 * sr
    clip = audio[start_sample:end_sample]

    # Generate spectrogram
    D = librosa.stft(clip)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Resize the spectrogram if necessary
    target_shape = (343, 59)
    if S_db.shape != target_shape:
        S_db = resize_spectrogram(S_db, target_shape)

    # Save spectrogram to HDF5
    with h5py.File('test_spectrograms.h5', 'a') as file:
        file.create_dataset(filename, data=S_db)
    return S_db

# Process each loud section and visualize the last one
for i, time in enumerate(loud_times):
    S_db = generate_spectrogram(y, time, sr, f'spectrogram_{i}')

librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()

def prepare_spectrogram(s_db):
    # Assuming the training data was normalized or preprocessed similarly
    s_db_normalized = s_db / np.max(np.abs(s_db))
    return np.expand_dims(s_db_normalized, axis=-1)  # Add channel dimension if needed

# Open the file with test spectrograms
with h5py.File('test_spectrograms.h5', 'r') as file:
    # List all datasets (spectrograms) in the file
    spectrogram_keys = list(file.keys())
    test_spectrograms = [prepare_spectrogram(file[key][:]) for key in spectrogram_keys]

predictions = model.predict(np.array(test_spectrograms))

predicted_classes = np.argmax(predictions, axis=1)

predicted_birds = [bird_types[i] for i in predicted_classes]

# Create a DataFrame to display results
results_df = pd.DataFrame({
    'Spectrogram Key': spectrogram_keys,
    'Predicted Bird': predicted_birds
})

# Display the DataFrame
print(results_df)


In [None]:
import librosa
import numpy as np
import h5py
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import pandas as pd
import tensorflow as tf

# Upload the file
from google.colab import files
uploaded = files.upload()

# Get the uploaded file path
audio_file_path = '/content/test2.mp3'

# Load the best model
model = tf.keras.models.load_model('best_bird_species_classifier.keras')

# Bird species labels
bird_types = ['amecro', 'barswa', 'bkcchi', 'blujay', 'daejun', 'houfin', 'mallar3', 'norfli', 'rewbla', 'stejay', 'wesmea', 'whcspa']

# Load the audio file
y, sr = librosa.load(audio_file_path, sr=22050)

# Define parameters for STFT
n_fft = 512  # Keeps frequency bins around 257, which is close to 256
hop_length = max(1, int((len(y) / sr) / 343))  # Adjust hop_length to achieve ~343 time steps

frame_length = n_fft
energy = np.array([
    sum(abs(y[i:i+frame_length]**2))
    for i in range(0, len(y), hop_length)
])

# Find frames with energy higher than a threshold
threshold = np.max(energy) * 0.5
high_energy_frames = np.where(energy > threshold)[0]

# Convert frames to time
loud_times = librosa.frames_to_time(high_energy_frames, sr=sr, hop_length=hop_length)

def resize_spectrogram(spectrogram, target_shape):
    y_ratio = target_shape[0] / spectrogram.shape[0]
    x_ratio = target_shape[1] / spectrogram.shape[1]
    resized_spectrogram = zoom(spectrogram, (y_ratio, x_ratio), order=1)  # Using bilinear interpolation (order=1)
    return resized_spectrogram

# Function to generate a spectrogram and save it
def generate_spectrogram(audio, start_time, sr, filename):
    # Extract 2-second clip
    start_sample = int(start_time * sr)
    end_sample = start_sample + 2 * sr
    clip = audio[start_sample:end_sample]

    # Generate spectrogram
    D = librosa.stft(clip)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Resize the spectrogram if necessary
    target_shape = (343, 59)
    if S_db.shape != target_shape:
        S_db = resize_spectrogram(S_db, target_shape)

    # Save spectrogram to HDF5
    with h5py.File('test_spectrograms.h5', 'a') as file:
        file.create_dataset(filename, data=S_db)
    return S_db

# Process each loud section and visualize the last one
for i, time in enumerate(loud_times):
    S_db = generate_spectrogram(y, time, sr, f'spectrogram_{i}')

librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()

In [None]:
import librosa
import numpy as np
import h5py
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import pandas as pd
import tensorflow as tf

# Upload the file
from google.colab import files
uploaded = files.upload()

# Get the uploaded file path
audio_file_path = '/content/test3.mp3'

# Load the best model
model = tf.keras.models.load_model('best_bird_species_classifier.keras')

# Bird species labels
bird_types = ['amecro', 'barswa', 'bkcchi', 'blujay', 'daejun', 'houfin', 'mallar3', 'norfli', 'rewbla', 'stejay', 'wesmea', 'whcspa']

# Load the audio file
y, sr = librosa.load(audio_file_path, sr=22050)

# Define parameters for STFT
n_fft = 512  # Keeps frequency bins around 257, which is close to 256
hop_length = max(1, int((len(y) / sr) / 343))  # Adjust hop_length to achieve ~343 time steps

frame_length = n_fft
energy = np.array([
    sum(abs(y[i:i+frame_length]**2))
    for i in range(0, len(y), hop_length)
])

# Find frames with energy higher than a threshold
threshold = np.max(energy) * 0.5
high_energy_frames = np.where(energy > threshold)[0]

# Convert frames to time
loud_times = librosa.frames_to_time(high_energy_frames, sr=sr, hop_length=hop_length)

def resize_spectrogram(spectrogram, target_shape):
    y_ratio = target_shape[0] / spectrogram.shape[0]
    x_ratio = target_shape[1] / spectrogram.shape[1]
    resized_spectrogram = zoom(spectrogram, (y_ratio, x_ratio), order=1)  # Using bilinear interpolation (order=1)
    return resized_spectrogram

# Function to generate a spectrogram and save it
def generate_spectrogram(audio, start_time, sr, filename):
    # Extract 2-second clip
    start_sample = int(start_time * sr)
    end_sample = start_sample + 2 * sr
    clip = audio[start_sample:end_sample]

    # Generate spectrogram
    D = librosa.stft(clip)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Resize the spectrogram if necessary
    target_shape = (343, 59)
    if S_db.shape != target_shape:
        S_db = resize_spectrogram(S_db, target_shape)

    # Save spectrogram to HDF5
    with h5py.File('test_spectrograms.h5', 'a') as file:
        file.create_dataset(filename, data=S_db)
    return S_db

# Process each loud section and visualize the last one
for i, time in enumerate(loud_times):
    S_db = generate_spectrogram(y, time, sr, f'spectrogram_{i}')

librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()