# Data Collection

## Import Required Libraries

In [None]:
import os
import requests
from tqdm import tqdm
import shutil

## XENO-Canto API Function

In [None]:
# Save bird calls for specific species from the xeno-canto API
def download_bird_calls(species_name, base_directory, max_results, max_duration_seconds):

    # Make the save directory (bird species specific folder) if it doesnt already exist
    folder_name = species_name.lower().replace(" ", "_")
    save_directory = os.path.join(base_directory, folder_name)

    # Clear existing folder if exists
    if os.path.exists(save_directory):
        shutil.rmtree(save_directory)

    # Create a new directory
    os.makedirs(save_directory)

    # Prepare query for the API
    query = species_name.replace(" ", "+")
    api_url = f"https://xeno-canto.org/api/2/recordings?query={query}"

    # Print the search query
    print(f"Searching for: {species_name}")

    # Make the API request
    response = requests.get(api_url)
    data = response.json()
    recordings = data.get("recordings", [])

    # Check if any recordings were found
    if not recordings:
        print("No recordings found.")
        return

    # Initialize saved variable to count number of saved files
    saved = 0

    # Loop through the obtained recordings
    for i, recording in enumerate(tqdm(recordings, desc = "Checking recordings")):

        # Check if the desired nunmber of recordings has already been saved
        if saved >= max_results:
            break

        # Check if the file is an MP3
        if not recording["file-name"].endswith(".mp3"):
            continue

        # Get the recording length and convert to seconds
        recording_length = recording.get("length", "")

        try:
            # Get the minutes and seconds from the length of recording
            minutes, seconds = map(int, recording_length.split(":"))

            # convert to seconds
            duration = minutes * 60 + seconds
        except:
            continue

        # Check if the recording is within the desired duration
        if duration <= max_duration_seconds:

            # Specify the recording url
            url = recording["file"]

            # Create a filename based on the species name and saved count
            filename = f"{folder_name}_{saved + 1}.mp3"

            # Create the full file path for the individual recording
            file_path = os.path.join(save_directory, filename)

            # Download the file if it does not already exist
            try:
                audio = requests.get(url, timeout = 3)

                # Check if the request was successful
                with open(file_path, "wb") as file:
                    file.write(audio.content)

                # Increment the saved count
                saved += 1

            # If the download fails, print an error message
            except Exception as e:
                print(f"Failed to download {url}: {e}")

    # Print the number of files downloaded for the species
    print(f"Downloaded {saved} .mp3 files to {save_directory}\n")

## Collect Bird Call Data

In [None]:
# Clear all content from a specified directory
def clear_folder_content(base_directory):
    if os.path.exists(base_directory):
        shutil.rmtree(base_directory)
    os.makedirs(base_directory)


# Clear the raw directory and processed directory
clear_folder_content("../data/raw/")
clear_folder_content("../data/processed/")


# Initlialize parameters for downloading bird calls
base_directory = "../data/raw/"
max_results = 100
max_duration_seconds = 20
birds = ["red-winged blackbird", "northern cardinal", "american barn owl", "canada goose"]


# Download bird calls for each species in the list
#for bird in birds:
#    download_bird_calls(bird, base_directory, max_results, max_duration_seconds)


PermissionError: [WinError 5] Access is denied: '../data/raw/american_robin'