In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

import os
import librosa
import numpy as np
!pip install pydub
from pydub import AudioSegment

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def concatenate_audio(audio_path, annotation_path, output_dir, sr=22050):
    # Load audio
    y, sr = librosa.load(audio_path, sr=sr)

    # Read annotations
    with open(annotation_path, 'r') as f:
        annotations = f.readlines()

    # Initialize concatenated audio
    concatenated_audio = AudioSegment.silent(duration=0)

    # Process each annotation with label "song" or "call"
    for annotation in annotations:
        start_sec, end_sec, label = annotation.split()
        start_sec, end_sec = map(float, (start_sec, end_sec))
        if label in ["song", "call", "drumming", "clapping"]:
            # Convert start and end seconds to frames
            start_frame = int(start_sec * sr)
            end_frame = int(end_sec * sr)

            # Extract segment
            segment = y[start_frame:end_frame]

            # Append segment to concatenated audio
            segment = librosa.util.buf_to_float(segment)
            segment_audio = AudioSegment(segment.tobytes(), frame_rate=sr, sample_width=segment.dtype.itemsize, channels=1)
            concatenated_audio += segment_audio

    # Save concatenated audio as .mp3
    output_filename = os.path.basename(audio_path).replace('.mp3', '_concatenated.mp3')
    output_path = os.path.join(output_dir, output_filename)
    concatenated_audio.export(output_path, format="mp3")

def process_dataset(dataset_dir, output_dir):
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.endswith('.mp3'):
                audio_path = os.path.join(root, file)
                annotation_path = os.path.join(root, file.replace('.mp3', '.txt'))
                output_subdir = os.path.join(output_dir, os.path.relpath(root, dataset_dir))
                os.makedirs(output_subdir, exist_ok=True)
                concatenate_audio(audio_path, annotation_path, output_subdir)

# Example usage
dataset_dir = '/content/drive/My Drive/birdaudio/'
output_dir = '/content/drive/My Drive/concatenated_audio/'
process_dataset(dataset_dir, output_dir)

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
import os
import librosa
import numpy as np
from tensorflow.keras.models import load_model
import cv2

class_labels = [
    'Acrocephalus melanopogon','Acrocephalus melanopogon','Acrocephalus scirpaceus',  'Alcedo atthis','Anas platyrhynchos','Anas strepera','Ardea purpurea','Botaurus stellaris',  'Charadrius alexandrinus',
     'Ciconia ciconia','Circus aeruginosus', 'Coracias garrulus', 'Dendrocopos minor', 'Fulica atra','Gallinula chloropus','Himantopus himantopus','Ixobrychus minutus',
    'Motacilla flava','Porphyrio porphyrio','Tachybaptus ruficollis'
]

def load_mel_spectrogram(file_path):
    return np.load(file_path)

# Function to preprocess data for prediction
def preprocess_data(spectrogram, target_size=(128, 128)):
    # Resize spectrogram
    resized_spectrogram = cv2.resize(spectrogram, target_size)
    # Add extra dimensions to match model input shape
    resized_spectrogram = np.expand_dims(resized_spectrogram, axis=0)
    resized_spectrogram = np.expand_dims(resized_spectrogram, axis=-1)
    return resized_spectrogram

# Function to make predictions
def predict(model, spectrogram):
    # Preprocess the spectrogram
    preprocessed_spectrogram = preprocess_data(spectrogram)
    # Make prediction
    prediction = model.predict(preprocessed_spectrogram)
    return prediction

def generate_mel_spectrogram(audio_path, output_dir, model, sr=22050, n_fft=2048, hop_length=512, n_mels=128):
    # Load audio
    y, sr = librosa.load(audio_path, sr=sr)

    # Calculate total number of seconds
    total_seconds = len(y) // sr

    # Process each second of the audio
    for sec in range(total_seconds):
        start_frame = int(sec * sr)
        end_frame = int((sec + 1) * sr)

        # Check if segment length is long enough for n_fft
        if end_frame - start_frame >= n_fft:
            # Extract segment
            segment = y[start_frame:end_frame]

            # Compute mel spectrogram
            S = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
            S_db = librosa.power_to_db(S, ref=np.max)

            # Save spectrogram as .npy
            output_filename = f"{os.path.splitext(os.path.basename(audio_path))[0]}_{sec}.npy"
            np.save(os.path.join(output_dir, output_filename), S_db)

            # Load the saved spectrogram
            mel_spectrogram = load_mel_spectrogram(os.path.join(output_dir, output_filename))

            # Make predictions
            prediction = predict(model, mel_spectrogram)
            predicted_label = np.argmax(prediction)
            predicted_class = class_labels[predicted_label]

            print(f"Predicted label for {output_filename}: {predicted_class}")

def process_single_audio(audio_path, output_dir, model):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Generate mel spectrograms for the single audio file
    generate_mel_spectrogram(audio_path, output_dir, model)

# Load your trained model
model_path = '/content/drive/My Drive/saved_models/trained_model.h5'
model = load_model(model_path)

# Example usage
dataset_dir = '/content/drive/My Drive/birdaudio/Ardea purpurea/XC485377.mp3'
output_dir = '/content/drive/My Drive/predictionspectrogram/XC485377/'
process_single_audio(dataset_dir, output_dir, model)


Predicted label for XC485377_0.npy: Ixobrychus minutus
Predicted label for XC485377_1.npy: Charadrius alexandrinus
Predicted label for XC485377_2.npy: Alcedo atthis
Predicted label for XC485377_3.npy: Ardea purpurea
Predicted label for XC485377_4.npy: Ixobrychus minutus
Predicted label for XC485377_5.npy: Circus aeruginosus
Predicted label for XC485377_6.npy: Ardea purpurea
Predicted label for XC485377_7.npy: Acrocephalus melanopogon
Predicted label for XC485377_8.npy: Gallinula chloropus
Predicted label for XC485377_9.npy: Motacilla flava
Predicted label for XC485377_10.npy: Ardea purpurea
Predicted label for XC485377_11.npy: Ardea purpurea
Predicted label for XC485377_12.npy: Acrocephalus melanopogon
Predicted label for XC485377_13.npy: Ardea purpurea
Predicted label for XC485377_14.npy: Ixobrychus minutus
Predicted label for XC485377_15.npy: Acrocephalus melanopogon
Predicted label for XC485377_16.npy: Tachybaptus ruficollis
Predicted label for XC485377_17.npy: Gallinula chloropus
P

In [6]:
import os
import librosa
import numpy as np
from tensorflow.keras.models import load_model
import cv2
from collections import Counter

class_labels = [
    'Acrocephalus melanopogon', 'Acrocephalus melanopogon', 'Acrocephalus scirpaceus', 'Alcedo atthis',
    'Anas platyrhynchos', 'Anas strepera', 'Ardea purpurea', 'Botaurus stellaris', 'Charadrius alexandrinus',
    'Ciconia ciconia', 'Circus aeruginosus', 'Coracias garrulus', 'Dendrocopos minor', 'Fulica atra',
    'Gallinula chloropus', 'Himantopus himantopus', 'Ixobrychus minutus', 'Motacilla flava', 'Porphyrio porphyrio',
    'Tachybaptus ruficollis'
]

def load_mel_spectrogram(file_path):
    return np.load(file_path)

# Function to preprocess data for prediction
def preprocess_data(spectrogram, target_size=(128, 128)):
    # Resize spectrogram
    resized_spectrogram = cv2.resize(spectrogram, target_size)
    # Add extra dimensions to match model input shape
    resized_spectrogram = np.expand_dims(resized_spectrogram, axis=0)
    resized_spectrogram = np.expand_dims(resized_spectrogram, axis=-1)
    return resized_spectrogram

# Function to make predictions
def predict(model, spectrogram):
    # Preprocess the spectrogram
    preprocessed_spectrogram = preprocess_data(spectrogram)
    # Make prediction
    prediction = model.predict(preprocessed_spectrogram)
    return prediction

def generate_mel_spectrogram(audio_path, output_dir, model, sr=22050, n_fft=2048, hop_length=512, n_mels=128):
    # Load audio
    y, sr = librosa.load(audio_path, sr=sr)

    # Calculate total number of seconds
    total_seconds = len(y) // sr

    # Process each second of the audio
    for sec in range(total_seconds):
        start_frame = int(sec * sr)
        end_frame = int((sec + 1) * sr)

        # Check if segment length is long enough for n_fft
        if end_frame - start_frame >= n_fft:
            # Extract segment
            segment = y[start_frame:end_frame]

            # Compute mel spectrogram
            S = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
            S_db = librosa.power_to_db(S, ref=np.max)

            # Save spectrogram as .npy
            output_filename = f"{os.path.splitext(os.path.basename(audio_path))[0]}_{sec}.npy"
            np.save(os.path.join(output_dir, output_filename), S_db)

            # Load the saved spectrogram
            mel_spectrogram = load_mel_spectrogram(os.path.join(output_dir, output_filename))

            # Make predictions
            prediction = predict(model, mel_spectrogram)
            predicted_label = np.argmax(prediction)
            predicted_class = class_labels[predicted_label]



def process_single_audio(audio_path, output_dir, model):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Initialize dictionary to store predictions for each second
    predictions = {}

    # Generate mel spectrograms for the single audio file
    generate_mel_spectrogram(audio_path, output_dir, model)

    # Iterate through generated spectrograms and aggregate predictions
    for file in os.listdir(output_dir):
        if file.endswith('.npy'):
            mel_spectrogram = load_mel_spectrogram(os.path.join(output_dir, file))
            prediction = predict(model, mel_spectrogram)
            predicted_label_index = np.argmax(prediction)
            predicted_label = class_labels[predicted_label_index]

            # Update predictions dictionary
            predictions[file] = predicted_label

    # Count occurrences of each predicted label
    label_counts = Counter(predictions.values())

    # Determine the most predicted label
    final_prediction = max(label_counts, key=label_counts.get)

    print(f"Final predicted label for {audio_path}: {final_prediction}")

# Load your trained model
model_path = '/content/drive/My Drive/saved_models/trained_model.h5'
model = load_model(model_path)

# Example usage
dataset_dir = '/content/XC521814 (1).mp3'
output_dir = '/content/drive/My Drive/predictionspectrogram/XC521814/'
process_single_audio(dataset_dir, output_dir, model)


Final predicted label for /content/XC521814 (1).mp3: Porphyrio porphyrio
