In [75]:
import requests
import os
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

In [76]:
def download_file(file_url, file_path):
    """Downloads a single file and saves it to the specified path."""
    try:
        with requests.get(file_url, stream=True) as r:
            r.raise_for_status()
            with open(file_path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        print(f"Downloaded: {file_path}")
    except Exception as e:
        print(f"Error downloading {file_path}: {e}")

def download_audio_files(data, folder_name):
    """Downloads audio files based on the data and saves them to the specified folder."""
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust the number of workers as needed
        future_to_file = {}
        for record in data['recordings'][:200]:
            file_url = record['file']
            file_name = f"{record['id']}.mp3"
            file_path = os.path.join(folder_name, file_name)
            future = executor.submit(download_file, file_url, file_path)
            future_to_file[future] = file_path
        
        # Wait for all futures to complete
        for future in as_completed(future_to_file):
            future_to_file[future]

In [77]:
i="1"

In [78]:
def download_metadata(data, folder_name, common_name):
    """Collects metadata for each recording and saves it to a CSV."""
    global i  # Declare i as global to modify its value outside the function
    metadata = []

    for record in data['recordings'][:200]:
        file_name = f"{record['id']}.mp3"  # Keep the file name, not the full path

        # Collect metadata for each recording with the specified columns
        metadata.append({
            "filename": file_name,
            "country": record.get('cnt'),
            "name": common_name,
            "lat": record.get('lat'),   # Latitude
            "lng": record.get('lng'),
            "fold": i
        })

    # Save metadata to CSV inside the species folder
    metadata_df = pd.DataFrame(metadata)
    metadata_df.to_csv(os.path.join(folder_name, "metadata.csv"), index=False)
    i = str(int(i) + 1)
    print(f"Metadata saved to {folder_name}/metadata.csv")


In [115]:
species = "Common Pheasant"
records = 200
url = f"https://www.xeno-canto.org/api/2/recordings?query={species}&num={records}"
response = requests.get(url)
data = response.json()

In [116]:
common_name = data['recordings'][0]['en']
folder_name = "sample"

In [117]:
download_audio_files(data, folder_name+i)

Downloaded: sample10\916392.mp3
Downloaded: sample10\916393.mp3
Downloaded: sample10\922701.mp3
Downloaded: sample10\895082.mp3
Downloaded: sample10\890518.mp3
Downloaded: sample10\871904.mp3
Downloaded: sample10\881961.mp3
Downloaded: sample10\840640.mp3
Downloaded: sample10\872784.mp3
Downloaded: sample10\835192.mp3
Downloaded: sample10\808441.mp3
Downloaded: sample10\799489.mp3
Downloaded: sample10\801092.mp3
Downloaded: sample10\796121.mp3
Downloaded: sample10\884861.mp3
Downloaded: sample10\792476.mp3
Downloaded: sample10\929867.mp3
Downloaded: sample10\792839.mp3
Downloaded: sample10\792475.mp3
Downloaded: sample10\791216.mp3
Downloaded: sample10\786448.mp3
Downloaded: sample10\776246.mp3
Downloaded: sample10\766428.mp3
Downloaded: sample10\784775.mp3
Downloaded: sample10\784065.mp3
Downloaded: sample10\749402.mp3
Downloaded: sample10\738441.mp3
Downloaded: sample10\738400.mp3
Downloaded: sample10\738398.mp3
Downloaded: sample10\738324.mp3
Downloaded: sample10\738315.mp3
Download

In [118]:
download_metadata(data, folder_name+i, common_name)

Metadata saved to sample10/metadata.csv
