<a href="https://colab.research.google.com/github/archit436/Birds_Classifier/blob/main/Data%20Processing/DP_Stage2_XC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

0. Setup


In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
# Import Relevant Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import requests
import os
import librosa
import soundfile as sf
from pydub import AudioSegment
# Copied from Prarathona's DP Pipeline for Baseline models.
import cv2
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import torch

1. Data Acquisiton from Xeno Canto


In [None]:
# Mount the google drive
from google.colab import drive
drive.mount('/content/drive')
csv_path = "/content/drive/MyDrive/APS360_Team_15/Data_Processing/chosen_classes_data_stats.csv"

# We start by importing the information about birds outputted by the stage 1 in
# the form of a csv file.
birds_df = pd.read_csv(csv_path)
birds_df.head()

Mounted at /content/drive


Unnamed: 0,Index,Class ID,Images Count,XC Recordings Count,Species Name,XC Species Name
0,0,315,116,169,Gadwall (Breeding male),Gadwall
1,1,317,120,244,Mallard (Breeding male),Mallard
2,2,333,105,112,Common Goldeneye (Breeding male),Common Goldeneye
3,3,338,116,89,California Quail (Male),California Quail
4,4,352,120,287,Black-crowned Night-Heron (Adult),Black-crowned Night Heron


In [None]:
# We will now query into Xeno-Canto API to download the audio files for the birds.
# We create a directory called Xeno Canto to store all the audio files.
xeno_canto_dir = "/content/drive/MyDrive/APS360_Team_15/Data/Xeno_Canto"
os.makedirs(xeno_canto_dir, exist_ok=True)

# Iterate through all the birds manually, due to timeout issues with Xeno Canto
it_bird = 106

# Extract basic data about the bird from the dataframe.
bird_id = birds_df["Class ID"][it_bird]
bird_xc_name = birds_df["XC Species Name"][it_bird]
bird_xc_recordings_count = birds_df["XC Recordings Count"][it_bird]

# Make a subdirectory for the current bird using its Class ID.
bird_dir = os.path.join(xeno_canto_dir, str(bird_id))
os.makedirs(bird_dir, exist_ok=True)

# Get the bird name and query the Xeno-Canto API.
# As before, we only query for high quality recordings
search_query = "+".join(bird_xc_name.split())
encoded_search_query = f"{search_query}+q:A"
url = f"https://www.xeno-canto.org/api/2/recordings?query={encoded_search_query}"
search_response = requests.get(url)
search_data = search_response.json()

# Use the search query response to download all the audio files.
# We limit the recordings to 500 per species, if they have more available.
for it_rec in range(min(500,bird_xc_recordings_count)):
    audio_url = search_data["recordings"][it_rec]["file"]
    audio_response = requests.get(audio_url, stream=True)
    with open(f"{bird_dir}/{it_rec}.mp3", "wb") as f:
        f.write(audio_response.content)
print(f"Downloaded {min(500,bird_xc_recordings_count)} recordings for {bird_xc_name}.")

Downloaded 337 recordings for Red-winged Blackbird.


In [None]:
# Code to reacquire any faulty recordings.
bird_class_id = 987
file_id = 241

# Find the name corresponding this bird.
bird_name = birds_df[birds_df["Class ID"] == bird_class_id]["XC Species Name"].values[0]

# Make the query and get the search response.
search_query = "+".join(bird_name.split())
encoded_search_query = f"{search_query}+q:A"
url = f"https://www.xeno-canto.org/api/2/recordings?query={encoded_search_query}"
search_response = requests.get(url)
search_data = search_response.json()

# Retrive the audio file and write to a file.
audio_url = search_data["recordings"][file_id]["file"]
audio_response = requests.get(audio_url, stream=True)

# Define the directory to write to and then download the audio file.
xeno_canto_dir = "/content/drive/MyDrive/APS360_Team_15/Data/Xeno_Canto"
bird_dir = os.path.join(xeno_canto_dir, str(bird_class_id))
with open(f"{bird_dir}/{file_id}.mp3", "wb") as f:
    f.write(audio_response.content)
print(f"Downloaded {file_id}.mp3 for {bird_name}.")
print(search_data["recordings"][file_id])

Downloaded 241.mp3 for Red-winged Blackbird.
{'id': '314546', 'gen': 'Agelaius', 'sp': 'phoeniceus', 'ssp': 'stereus/fortis', 'group': 'birds', 'en': 'Red-winged Blackbird', 'rec': 'Nick Komar', 'cnt': 'United States', 'loc': 'Fort Collins, Larimer County, Colorado', 'lat': '40.5717', 'lng': '-105.1289', 'alt': '1600', 'type': 'song, atypical male song', 'sex': 'male', 'stage': '', 'method': 'field recording', 'url': '//xeno-canto.org/314546', 'file': 'https://xeno-canto.org/314546/download', 'file-name': 'XC314546-160501_006 RWBL male song alt 0710 Ft Collins CO nk.mp3', 'sono': {'small': '//xeno-canto.org/sounds/uploaded/ODAYZINCOA/ffts/XC314546-small.png', 'med': '//xeno-canto.org/sounds/uploaded/ODAYZINCOA/ffts/XC314546-med.png', 'large': '//xeno-canto.org/sounds/uploaded/ODAYZINCOA/ffts/XC314546-large.png', 'full': '//xeno-canto.org/sounds/uploaded/ODAYZINCOA/ffts/XC314546-full.png'}, 'osci': {'small': '//xeno-canto.org/sounds/uploaded/ODAYZINCOA/wave/XC314546-small.png', 'med': '

2. Data Processing - Spectograms


In [None]:
# Define a function to convert audio files into spectogram images.
# We are creating colored db spectorgrams with normalised values.
# All other parameters are default values.
def audio_to_spectogram(audio_path, img_size = (128, 128)):
  try:
    # Load the audio file. Keep sample rate the same as the one of the audio file.
    y, sr = librosa.load(audio_path, sr=None)

    # Generate mel spectogram with default settings.
    # We use a mel spectogram because it emphasizes the lower and mid-range frequencies
    # by aligning the axis values. This suits birds sounds.
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
    # Convert to db scale, to allow both loud and quiet parts to be captured effectively.
    db_spec = librosa.power_to_db(mel_spec, ref=np.max)

    # Normalise the db values to [0,1].
    norm_spec = (db_spec - np.min(db_spec)) / (np.max(db_spec) - np.min(db_spec))
    # Resisze to target dimension
    resized_spec = cv2.resize(norm_spec, img_size)

    # Apply a colormap to convert to RGB.
    cmap = plt.get_cmap('viridis')
    # Drop alpha channel.
    img_colored = cmap(resized_spec)[:, :, :3]
    # Convert to float32 to make it suitable for use by models.
    img = (img_colored * 255).astype(np.float32)

    return img
  except Exception as e:
    print(f"Error processing {audio_path}: {e}")
    return None

In [None]:
# For each bird, we generate spectograms and we store them in a .pt tensor.
# Define a directory to store the spectograms.
data_dir = "/content/drive/MyDrive/APS360_Team_15/Data"
spec_dir = os.path.join(data_dir, "Xeno_Canto_Spectrograms")
os.makedirs(spec_dir, exist_ok=True)

# Iterate through all the birds manually, for data integrity.
it_bird = 105
# Extract bird class id from the birds dataframe.
bird_id = birds_df["Class ID"][it_bird]
# Define directory of the audio files.
bird_audio_dir = os.path.join(data_dir, "Xeno_Canto", str(bird_id))

# Iterate through all the files in the audio files directory.
# Create a list to store the spectorgrams
spec_list = []
for file_name in os.listdir(bird_audio_dir):
  print(f"Looking at file {file_name}")
  # Look for only mp3 files.
  if(file_name.endswith(".mp3")):
    # Define the path to the audio file.
    audio_path = os.path.join(bird_audio_dir, file_name)
    # Convert the audio file to a spectogram and to the list.
    spec = audio_to_spectogram(audio_path)
    if(spec is not None):
      spec_list.append(spec)

# Convert the list to a tensor.
spec_tensor = torch.tensor(np.array(spec_list))
# Save the tensor to a file.
output_file = os.path.join(spec_dir, f"{bird_id}.pt")
torch.save(spec_tensor, output_file)
print(f"Saved spectogram tensor for bird num {it_bird} with id: {bird_id}.")