In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import requests
import csv
import urllib.request
%matplotlib inline
import re

import os


In [2]:
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [3]:
# Generate csv file from xeno-canto API

# specify url
web_url = "https://xeno-canto.org/api/2/recordings?query=sp&field=species/all"
html = urlopen(web_url)

In [4]:
soup = BeautifulSoup(html, 'html.parser')
type(soup)

bs4.BeautifulSoup

In [5]:
# Print out the text
text = soup.get_text()
print(text)

{"numRecordings":"44150","numSpecies":"551","page":1,"numPages":89,"recordings":[{"id":"36065","gen":"Synallaxis","sp":"chinchipensis","ssp":"","group":"birds","en":"Chinchipe Spinetail","rec":"Frank Lambert","cnt":"Peru","loc":"between Jaen and San Ignacio","lat":null,"lng":null,"alt":"?","type":"call, song","sex":"","stage":"","method":"field recording","url":"\/\/xeno-canto.org\/36065","file":"https:\/\/xeno-canto.org\/36065\/download","file-name":"SPChinchipe_FL 3Jul06 between Jaen_SanIgnacio.mp3","sono":{"small":"\/\/xeno-canto.org\/sounds\/uploaded\/YTUXOCTUEM\/ffts\/XC36065-small.png","med":"\/\/xeno-canto.org\/sounds\/uploaded\/YTUXOCTUEM\/ffts\/XC36065-med.png","large":"\/\/xeno-canto.org\/sounds\/uploaded\/YTUXOCTUEM\/ffts\/XC36065-large.png","full":"\/\/xeno-canto.org\/sounds\/uploaded\/YTUXOCTUEM\/ffts\/XC36065-full.png"},"osci":{"small":"\/\/xeno-canto.org\/sounds\/uploaded\/YTUXOCTUEM\/wave\/XC36065-small.png","med":"\/\/xeno-canto.org\/sounds\/uploaded\/YTUXOCTUEM\/wave\

In [6]:
# convert to dataframe
parsed_json = json.loads(text)

# Create DataFrame from the 'recordings' key of the parsed JSON dictionary
df = pd.DataFrame(parsed_json['recordings'])
df.head()

Unnamed: 0,id,gen,sp,ssp,group,en,rec,cnt,loc,lat,...,rmk,bird-seen,animal-seen,playback-used,temp,regnr,auto,dvc,mic,smp
0,36065,Synallaxis,chinchipensis,,birds,Chinchipe Spinetail,Frank Lambert,Peru,between Jaen and San Ignacio,,...,This taxon is more usually called Chinchipe Sp...,unknown,unknown,unknown,,,no,,,44100
1,692727,Arremon,dorbignii,,birds,Moss-backed Sparrow,Jacob Wijpkema,Bolivia,"Franz Tamayo, La Paz Department",-14.4185,...,"In Bolivia this is now called ""Moss-backed Spa...",yes,yes,no,,,no,,,44100
2,296852,Synallaxis,chinchipensis,,birds,Chinchipe Spinetail,Ross Gallardy,Peru,"San Ignacio, Cajamarca",-5.4981,...,,yes,yes,yes,,,no,,,44100
3,296851,Synallaxis,chinchipensis,,birds,Chinchipe Spinetail,Ross Gallardy,Peru,"San Ignacio, Cajamarca",-5.4981,...,,yes,yes,yes,,,no,,,44100
4,27768,Synallaxis,chinchipensis,,birds,Chinchipe Spinetail,David Edwards,Peru,Chirinos,-5.24667,...,,unknown,unknown,unknown,,,no,,,44100


In [7]:
# generate csv file from the json data
df.describe()

Unnamed: 0,id,gen,sp,ssp,group,en,rec,cnt,loc,lat,...,rmk,bird-seen,animal-seen,playback-used,temp,regnr,auto,dvc,mic,smp
count,500,500,500,500.0,500,500,500,500,500,451.0,...,500.0,500,500,500,500.0,500.0,500,500.0,500.0,500
unique,500,14,16,18.0,1,17,172,32,347,326.0,...,233.0,3,3,3,1.0,1.0,1,6.0,5.0,8
top,36065,Ortalis,guttata,,birds,Speckled Chachalaca,Peter Boesman,Brazil,Wildlife Institute of India Campus--Nature Tra...,30.2864,...,,unknown,unknown,no,,,no,,,44100
freq,1,115,122,405.0,500,115,51,140,6,6.0,...,235.0,206,206,259,500.0,500.0,500,490.0,491.0,331


In [8]:
# convert data frame into csv file
df.to_csv('bird_song_dataset.csv', index=False)


In [9]:
# extract bird songs from the website
url = "https://xeno-canto.org/api/2/recordings?query=sp&field=species/all"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    if "recordings" in data:
        species_list = list({recording["sp"] for recording in data["recordings"]})

        # Specify the file path for the CSV file
        csv_file_path = "species_list.csv"

        # Write the species_list to the CSV file
        with open(csv_file_path, "w", newline="", encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Species"])  # Write header row
            writer.writerows([[species] for species in species_list])  # Write species names row by row

        print("Data saved to CSV file successfully.")
else:
    print("Error: Unable to fetch data from the API.")

Data saved to CSV file successfully.


In [10]:
import os
import requests
import urllib.request

# Xeno-Canto API endpoint for bird songs
api_url = "https://xeno-canto.org/api/2/recordings?query=sp&field=species/all?"

def download_audio(url, file_path):
    """Download audio file from the given URL and save it to the specified file path."""
    try:
        urllib.request.urlretrieve(url, file_path)
        print(f"Audio file saved: {file_path}")
    except Exception as e:
        print(f"Error downloading audio: {e}")

def extract_bird_songs(api_url):
    try:
        response = requests.get(api_url)
        if response.status_code == 200:
            data = response.json()

            # Create main folder to store audio files
            main_folder = "bird_sounds"
            if not os.path.exists(main_folder):
                os.makedirs(main_folder)

            for recording in data["recordings"]:
                # Extract relevant information from the recording
                call_type = recording["type"]

                if re.search(r'\b(?:alarm)\b', call_type):
                    folder_name = "alarm_calls"
                elif re.search(r'\b(?:duet)\b', call_type):
                    folder_name = "duet_calls"
                else:
                    folder_name = "other_calls"

                # Create a subdirectory for the call type (if it doesn't exist)
                type_folder = os.path.join(main_folder, folder_name)
                if not os.path.exists(type_folder):
                    os.makedirs(type_folder)

                # Generate a unique filename using the recording ID
                recording_id = recording["id"]
                file_name = f"{recording_id}.mp3"
                file_path = os.path.join(type_folder, file_name)  # Save the audio with the unique filename in the type folder

                # Download the audio file
                download_audio(recording["file"], file_path)

        else:
            print(f"Error fetching data from Xeno-Canto API. Status code: {response.status_code}")
    except Exception as e:
        print(f"Error extracting bird songs: {e}")

if __name__ == "__main__":
    extract_bird_songs(api_url)


Audio file saved: bird_sounds\other_calls\36065.mp3
Audio file saved: bird_sounds\other_calls\692727.mp3
Audio file saved: bird_sounds\other_calls\296852.mp3
Audio file saved: bird_sounds\other_calls\296851.mp3
Audio file saved: bird_sounds\other_calls\27768.mp3
Audio file saved: bird_sounds\other_calls\763551.mp3
Audio file saved: bird_sounds\other_calls\692726.mp3
Audio file saved: bird_sounds\other_calls\494097.mp3
Audio file saved: bird_sounds\other_calls\351193.mp3
Audio file saved: bird_sounds\other_calls\288810.mp3
Audio file saved: bird_sounds\other_calls\230215.mp3
Audio file saved: bird_sounds\other_calls\230214.mp3
Audio file saved: bird_sounds\other_calls\230213.mp3
Audio file saved: bird_sounds\other_calls\230211.mp3
Audio file saved: bird_sounds\other_calls\122189.mp3
Audio file saved: bird_sounds\other_calls\49729.mp3
Audio file saved: bird_sounds\other_calls\36066.mp3
Audio file saved: bird_sounds\other_calls\12113.mp3
Audio file saved: bird_sounds\other_calls\459097.mp