# Data Collection

## Import Required Libraries

In [37]:
import os
import requests
from tqdm import tqdm
import shutil

## XENO-Canto API Function

In [None]:
# Save bird calls for specific sepcies from the xeno-canto API
def download_bird_calls(species_name, base_directory, max_results, max_duration_seconds):

    # Make the save directory if it doesnt already exist
    folder_name = species_name.lower().replace(" ", "_")
    save_directory = os.path.join(base_directory, folder_name)

    # Clear existing folder if exists, then create a new one
    if os.path.exists(save_directory):
        shutil.rmtree(save_directory)
    os.makedirs(save_directory)

    # Prepare query
    query = species_name.replace(" ", "+")
    api_url = f"https://xeno-canto.org/api/2/recordings?query={query}"

    print(f"Searching for: {species_name}")
    response = requests.get(api_url)
    data = response.json()
    recordings = data.get("recordings", [])

    if not recordings:
        print("No recordings found.")
        return

    # Initialize saved variable to count number of saved files
    saved = 0

    # Loop through recordings
    for i, recording in enumerate(tqdm(recordings, desc = "Checking recordings")):

        # Check if the desired nunmber of recordings has already been saved
        if saved >= max_results:
            break

        # Check if the file is an MP3
        if not recording["file-name"].endswith(".mp3"):
            continue

        # Get the recording length and convert to seconds
        recording_length = recording.get("length", "")

        try:
            # Get the minutes and seconds from the length of recording
            minutes, seconds = map(int, recording_length.split(":"))

            # convert to seconds
            duration = minutes * 60 + seconds
        except:
            continue

        # Check if the recording is within the desired duration
        if duration <= max_duration_seconds:

            # Specify url
            url = recording["file"]

            # Create a filename based on the species name and saved count
            filename = f"{folder_name}_{saved + 1}.mp3"

            # Create the full file path
            file_path = os.path.join(save_directory, filename)

            # Download the file if it does not already exist
            try:
                audio = requests.get(url, timeout = 3)

                # Check if the request was successful
                with open(file_path, "wb") as file:
                    file.write(audio.content)

                # Increment the saved count
                saved += 1

            # If the download fails, print an error message
            except Exception as e:
                print(f"Failed to download {url}: {e}")

    print(f"Downloaded {saved} .mp3 files to {save_directory}\n")

## Collect Bird Call Data

In [40]:
base_directory = "../data/raw/"
max_results = 50
max_duration_seconds = 20

birds = ["bald eagle", "red-tailed hawk", "american robin", "northern cardinal"]

for bird in birds:
    download_bird_calls(bird, base_directory, max_results, max_duration_seconds)


Searching for: bald eagle


Checking recordings: 100%|██████████| 117/117 [00:51<00:00,  2.27it/s]


Downloaded 39 .mp3 files to ../data/raw/bald_eagle
Searching for: red-tailed hawk


Checking recordings:  96%|█████████▌| 199/207 [01:11<00:02,  2.77it/s]


Downloaded 50 .mp3 files to ../data/raw/red-tailed_hawk
Searching for: american robin


Checking recordings:  59%|█████▉    | 297/500 [01:18<00:53,  3.77it/s]


Downloaded 50 .mp3 files to ../data/raw/american_robin
Searching for: northern cardinal


Checking recordings:  66%|██████▌   | 330/500 [01:26<00:44,  3.81it/s]

Downloaded 50 .mp3 files to ../data/raw/northern_cardinal



