In [None]:
!pip install librosa matplotlib



In [None]:
!pip install pydub



In [None]:
import requests
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO
from pydub import AudioSegment
import os

BASE_URL = "https://www.xeno-canto.org"

def download_bird_calls_vancouver(page=1):
    base_url = "https://www.xeno-canto.org/api/2/recordings"
    params = {
        'query': 'cnt:Canada loc:"Vancouver" q:A',
        'page': page
    }
    response = requests.get(base_url, params=params)
    try:
        data = response.json()
    except requests.exceptions.JSONDecodeError:
        print("Failed to decode JSON:")
        print("Status Code:", response.status_code)
        print("Response Body:", response.text)
        return None
    return data

# To dataframe, for data management
def collect_data_to_dataframe(data):
    recordings_list = []
    for record in data['recordings']:
        audio_url = f"{BASE_URL}/{record['id']}/download"
        multiple_species = bool(record['also'])  # Checks if 'also' field is non-empty ; indicative of multilabel
        recordings_list.append({
            'ID': record['id'],
            'Species': record['en'],
            'Quality': record['q'],
            'Date': record['date'],
            'AudioURL': audio_url,
            'MultipleSpecies': multiple_species,  # True if there are additional species
            # 'AlsoSpecies': record['also']  # List of other species recorded
        })
    return pd.DataFrame(recordings_list)



# USAGE #
all_recordings = pd.DataFrame()

bird_calls_vancouver = download_bird_calls_vancouver() # from Xeno-Canto
all_recordings = pd.concat([all_recordings, collect_data_to_dataframe(bird_calls_vancouver)], ignore_index=True) # all recordings

# Select only single species
single_species_recordings = all_recordings[~all_recordings['MultipleSpecies']]
print(f"Number of single species recordings: {single_species_recordings.shape[0]}")

# Select the top 10 species with the most recordings
species_counts = single_species_recordings.groupby('Species').size().sort_values(ascending=False)
top_10_species = species_counts.head(10)
top_10_species_names = top_10_species.index.tolist()
top_10_species_recordings = single_species_recordings[single_species_recordings['Species'].isin(top_10_species_names)]
print("Top 10 species with the most recordings:")
print(top_10_species)



Number of single species recordings: 339
Top 10 species with the most recordings:
Species
Spotted Towhee            36
Bewick's Wren             27
Song Sparrow              18
Anna's Hummingbird        16
White-crowned Sparrow     13
Pine Siskin               12
Golden-crowned Sparrow    11
Red-winged Blackbird      11
Red-breasted Nuthatch      9
Marsh Wren                 9
dtype: int64


In [1]:
def save_mfcc_image(mfccs, file_id, species, output_dir='/content/MFCC'):
    valid_species = "".join(x for x in species if x.isalnum())

    # Process each MFCC array individually
    for i, mfcc in enumerate(mfccs):
        filename = os.path.join(output_dir, f"{file_id}_{valid_species}_MFCC_{i}.png")  # Name files uniquely

        # Plot the MFCC
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(mfcc, sr=22050)
        plt.tight_layout()
        plt.savefig(filename)
        plt.close()
        print(f"Saved MFCC image to {filename}")


# Download audio files
def download_audio_file(url, file_id):
    local_filename = f"{file_id}"
    response = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    return local_filename



# Process to MFCC
def process_audio_to_mfcc(file_path, segment_duration=3, sr=22050):
    audio, sr = librosa.load(file_path, sr=sr)
    segment_length = int(sr * segment_duration)
    mfccs = []

    # Process each segment of the audio
    for start_sample in range(0, len(audio) - segment_length + 1, segment_length):
        end_sample = start_sample + segment_length
        if end_sample <= len(audio): # maek sure segment doesnt go beyond the end of the file
            segment = audio[start_sample:end_sample]
            mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=13)
            mfccs.append(mfcc)

    return mfccs



# plot MFCCs - for visualization
def plot_mfcc(mfcc):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfcc, x_axis='time', sr=22050)
    plt.colorbar()
    plt.title('MFCC')
    plt.tight_layout()
    plt.show()



# USAGE #
audio_save_dir = '/content/MFCC'
if not os.path.exists(audio_save_dir):
    os.mkdir(audio_save_dir)


mfcc_images_count = {species: 0 for species in top_10_species_names}

for index, row in top_10_species_recordings.iterrows():
    try:
        local_file_path = os.path.join(audio_save_dir, f"{row['ID']}.mp3")
        audio_file_path = download_audio_file(row['AudioURL'], local_file_path)
        # mfccs = process_audio_to_mfcc(audio_file_path)
        # save_mfcc_image(mfccs, row['ID'], row['Species']) # single species MFCC
    except Exception as e:
        print(f"Failed to download or process {row['AudioURL']}: {e}")


In [None]:
# !rm -rf MFCC

In [None]:
# Download the file, so no need to extract again
!zip -r /content/MFCC.zip /content/MFCC

from google.colab import files
files.download("/content/MFCC.zip")

  adding: content/MFCC/ (stored 0%)
  adding: content/MFCC/160282_BewicksWren_MFCC_0.png (deflated 30%)
  adding: content/MFCC/159015_PineSiskin_MFCC_31.png (deflated 28%)
  adding: content/MFCC/185532_SpottedTowhee_MFCC_16.png (deflated 31%)
  adding: content/MFCC/161155_GlaucouswingedGull_MFCC_61.png (deflated 29%)
  adding: content/MFCC/701173_RedwingedBlackbird_MFCC_2.png (deflated 30%)
  adding: content/MFCC/159685_SpottedTowhee_MFCC_9.png (deflated 31%)
  adding: content/MFCC/161155_GlaucouswingedGull_MFCC_13.png (deflated 29%)
  adding: content/MFCC/161155_GlaucouswingedGull_MFCC_74.png (deflated 29%)
  adding: content/MFCC/159542_SpottedTowhee_MFCC_44.png (deflated 30%)
  adding: content/MFCC/183590_BewicksWren_MFCC_13.png (deflated 29%)
  adding: content/MFCC/682571_SongSparrow_MFCC_14.png (deflated 28%)
  adding: content/MFCC/160102_RedwingedBlackbird_MFCC_10.png (deflated 30%)
  adding: content/MFCC/160171_SpottedTowhee_MFCC_20.png (deflated 34%)
  adding: content/MFCC/16114

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

**Augmentation**

In [None]:
# !unzip /content/MFCC.zip

In [None]:
!pip install numpy matplotlib pandas Pillow



In [None]:
import os
import random
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import glob
import soundfile as sf


def mix_audio_and_compute_mfcc(audio_path1, audio_path2, sr=22050):
    # Load two audio files, mix and compute the MFCC.
    audio1, _ = librosa.load(audio_path1, sr=sr)
    audio2, _ = librosa.load(audio_path2, sr=sr)
    min_length = min(len(audio1), len(audio2))
    mixed_audio = audio1[:min_length] + audio2[:min_length]
    mfcc = librosa.feature.mfcc(y=mixed_audio, sr=sr, n_mfcc=13)
    return mfcc

def save_mfcc_image(mfcc, output_filename, output_dir):
    # Save MFCC array as an image.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfcc, sr=22050)
    plt.tight_layout()
    output_path = os.path.join(output_dir, output_filename)
    plt.savefig(output_path)
    plt.close()
    print(f"Saved MFCC image to {output_path}")


def parse_filename(filename):
    # Parse the filename to extract ID, index, and species name for file naming, later to be extracted as label
    parts = filename.split('_')
    id = parts[0]
    species_name = "_".join(parts[1:-2])
    index = parts[-1].split('.')[0].replace('MFCC', '')
    return id, index, species_name



def download_and_segment_audio(file_id, output_dir, segment_duration=3, sr=22050):
    local_filename = f"/content/MFCC/{file_id}.mp3"

    # Load the entire audio file
    audio, sr = librosa.load(local_filename, sr=sr)
    segment_length = int(sr * segment_duration)
    segment_filenames = []

    # Process each segment of the audio
    for i in range(0, len(audio), segment_length):
        end_sample = i + segment_length
        if end_sample <= len(audio):  # Ensure segment doesn't go beyond the end of the file
            segment = audio[i:end_sample]
            segment_filename = f"{output_dir}/{file_id}_seg_{i//segment_length}.wav"
            sf.write(segment_filename, segment, sr)
            segment_filenames.append(segment_filename)
    return segment_filenames

def initialize_log_file(output_dir, log_filename='processed_segments.log'):
    log_path = os.path.join('/content', log_filename)
    if not os.path.exists(log_path):
        with open(log_path, 'w') as f:
            f.write('')
    return log_path

def check_if_processed(log_path, segment_id):
    with open(log_path, 'r') as f:
        processed_ids = f.read().splitlines()
    return segment_id in processed_ids

def update_log(log_path, segment_id):
    with open(log_path, 'a') as f:
        f.write(f"{segment_id}\n")


# Skips introduced to make sure can pick up where it left off when ram crashes
def generate_combined_mfccs(top_10_species_recordings, audio_save_dir, output_dir, segment_duration=3, log_filename='processed_segments.log'):
    log_path = initialize_log_file(output_dir, log_filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if not os.path.exists(audio_save_dir):
        os.makedirs(audio_save_dir)

    unique_species = top_10_species_recordings['Species'].unique()

    for species1 in unique_species:
        species1_recordings = top_10_species_recordings[top_10_species_recordings['Species'] == species1]
        for index1, row1 in species1_recordings.iterrows():
            species1_name = row1['Species']  # Retrieve species name for species1
            segment_filenames1 = download_and_segment_audio(f"{row1['ID']}", audio_save_dir, segment_duration=segment_duration)
            if not segment_filenames1:
                print(f"No segments found for {species1_name} ID {row1['ID']}")
                continue  # Skip this iteration if no segments were created - faulty hang handler

            for seg1_filename in segment_filenames1:
                segment_id = f"{os.path.basename(seg1_filename)}"
                if check_if_processed(log_path, segment_id):
                    print(f"Skipping processed segment {segment_id}")
                    continue  # Skip this seg if already processed

                other_species = [sp for sp in unique_species if sp != species1]
                if not other_species:  # Check if other species list is empty
                    print("No other species available for mixing")
                    continue

                mixed_segments = []
                for _ in range(5):  # Mix with 5 different species
                    species2 = random.choice(other_species)
                    species2_recordings = top_10_species_recordings[top_10_species_recordings['Species'] == species2].sample(1)
                    row2 = species2_recordings.iloc[0]
                    species2_name = row2['Species']
                    segment_filenames2 = download_and_segment_audio(f"{row2['ID']}", audio_save_dir, segment_duration=segment_duration)
                    if not segment_filenames2:
                        print(f"No segments found for {species2_name} ID {row2['ID']}")
                        continue  # Skip to the next iteration if no segments were created

                    seg2_filename = random.choice(segment_filenames2)
                    mfcc = mix_audio_and_compute_mfcc(seg1_filename, seg2_filename)
                    output_filename = f"{species1_name}_{os.path.basename(seg1_filename)}+{species2_name}_{os.path.basename(seg2_filename)}.png"
                    save_mfcc_image(mfcc, output_filename, output_dir)
                    mixed_segments.append(seg2_filename)
                    update_log(log_path, segment_id)
                print(f"Segments mixed for {row1['ID']}: {mixed_segments}")

# USAGE #
audio_save_dir = '/content/MFCC_audio'  # Directory to save audio files
output_directory = '/content/MFCC_augmented'  # Directory to save MFCC images
generate_combined_mfccs(top_10_species_recordings, audio_save_dir, output_directory)


Skipping processed segment 701552_seg_0.wav
Skipping processed segment 701552_seg_1.wav
Skipping processed segment 701552_seg_2.wav
Skipping processed segment 701552_seg_3.wav
Skipping processed segment 701552_seg_4.wav
Skipping processed segment 701552_seg_5.wav
Skipping processed segment 701552_seg_6.wav
Skipping processed segment 701552_seg_7.wav
Skipping processed segment 701552_seg_8.wav
Skipping processed segment 160190_seg_0.wav
Skipping processed segment 160078_seg_0.wav
Skipping processed segment 160078_seg_1.wav
Skipping processed segment 160078_seg_2.wav
Skipping processed segment 160078_seg_3.wav
Skipping processed segment 160078_seg_4.wav
Skipping processed segment 160078_seg_5.wav
Skipping processed segment 160078_seg_6.wav
Skipping processed segment 160078_seg_7.wav
Skipping processed segment 160078_seg_8.wav
Skipping processed segment 160078_seg_9.wav
Skipping processed segment 160078_seg_10.wav
Skipping processed segment 160078_seg_11.wav
Skipping processed segment 160

In [None]:
# Download the file
!zip -r /content/MFCC_augmented.zip /content/MFCC_augmented

# from google.colab import files
# files.download("/content/MFCC_augmented.zip")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/MFCC_augmented/Red-breasted Nuthatch_159157_seg_58.wav+Marsh Wren_158917_seg_11.wav.png (deflated 28%)
  adding: content/MFCC_augmented/Song Sparrow_638590_seg_1.wav+Bewick's Wren_160282_seg_0.wav.png (deflated 26%)
  adding: content/MFCC_augmented/Spotted Towhee_160245_seg_29.wav+White-crowned Sparrow_184998_seg_10.wav.png (deflated 26%)
  adding: content/MFCC_augmented/Bewick's Wren_475742_seg_5.wav+Red-winged Blackbird_164257_seg_1.wav.png (deflated 30%)
  adding: content/MFCC_augmented/Marsh Wren_158917_seg_21.wav+White-crowned Sparrow_160176_seg_3.wav.png (deflated 28%)
  adding: content/MFCC_augmented/Song Sparrow_160160_seg_18.wav+Spotted Towhee_159636_seg_7.wav.png (deflated 29%)
  adding: content/MFCC_augmented/Bewick's Wren_164259_seg_2.wav+Song Sparrow_160162_seg_1.wav.png (deflated 26%)
  adding: content/MFCC_augmented/Red-winged Blackbird_183592_seg_57.wav+Bewick's Wren_159972_seg_1.wav.png 