#Import Library

In [None]:
import urllib.request
import json
import sys
import os
import glob
import pandas as pd

#Donwload Query

In [None]:
corepath="/content/drive/MyDrive/Download_Birds_Data/"

In [None]:
def save_json(searchTerms, birdName, country):
    numPages = 1
    page = 1
    # create a path to save json files and recordings
    path = corepath + birdName.replace(':', '') + "/"
    if not os.path.exists(path):
        print("Creating subdirectory " + path + " for downloaded files...")
        os.makedirs(path)
        # download a json file for every page found in a query
    while page < numPages + 1:
        print("Loading page " + str(page) + "...")
        url = 'https://www.xeno-canto.org/api/2/recordings?query={0}&page={1}'.format(searchTerms.replace(' ', '%20'),
                                                                                      page)
        print(url)
        jsonPage = urllib.request.urlopen(url)
        jsondata = json.loads(jsonPage.read().decode('utf-8'))
        filename = corepath + birdName.replace(':', '') + "_jsondata_p" + str(page) + ".json"
        with open(filename, 'w') as outfile:
            json.dump(jsondata, outfile)
        # check number of pages
        numPages = jsondata['numPages']
        page = page + 1
    print("Found ", numPages, " pages in total.")
    # return number of files in json
    # each page contains 500 results, the last page can have less than 500 records
    print("Saved json for ", (numPages - 1) * 500 + len(jsondata['recordings']), " files")
    return path

In [None]:
# reads the json and return the list of values for selected json part
# i.e. "id" - ID number, "type": type of the bird sound such as call or song
# for all Xeno Canto files found with the given search terms.
def read_data(searchTerm, path, birdName):
    data = []
    numPages = 1
    page = 1
    # read all pages and save results in a list
    while page < numPages + 1:
        # read file
        with open(corepath + birdName.replace(':', '') + "_jsondata_p" + str(page) + ".json", 'r') as jsonfile:
            jsondata = jsonfile.read()
        jsondata = json.loads(jsondata)
        # check number of pages
        numPages = jsondata['numPages']
        # find "recordings" in a json and save a list with a search term
        for k in range(len(jsondata['recordings'])):
            data.append(jsondata["recordings"][k][searchTerm])
        page = page + 1
    return data

In [None]:
# downloads all sound files found with the search terms into xeno-canto directory
# into catalogue named after the search term (i.e. Apus apus)
# filename have two parts: the name of the bird in latin and ID number
def download(searchTerms, birdName, country):
    # create data/xeno-canto-dataset directory
    path = save_json(searchTerms, birdName, country)
    # get filenames: recording ID and bird name in latin from json
    filenamesID = read_data('id', path, birdName)
    filenamesCountry = read_data('cnt', path, birdName)
    # get website recording http download address from json
    fileaddress = read_data('file', path, birdName)
    numfiles = len(filenamesID)
    print("A total of ", numfiles, " files will be downloaded")
    for i in range(0, numfiles):
        print("Saving file ", i + 1, "/", numfiles,
              corepath + filenamesID[i] + birdName.replace(':', '') + ".wav")
        urllib.request.urlretrieve("http:" + fileaddress[i],
                                   path + "/" + filenamesID[i] + birdName  + ".wav")
    return print("===============Done Download==============")

#Download per Species

In [None]:
# List all Species here:
birds = ['Dryocopus javensis', 'Caprimulgus macrurus', 'Pnoepyga pusilla',
         'Malacocincla malaccensis', 'Anthipes solitaris']

In [None]:
for bird in birds:
    download(bird, bird.replace(' ', ''), 'countries')

    #Convert Json per species into csv file
    # Get the json entries from your downloaded json
    jsonFile = open(corepath + bird.replace(' ', '') + "_jsondata_p1" + ".json", 'r')
    values = json.load(jsonFile)
    jsonFile.close()

    # Create a pandas dataframe of records & convert to .csv file
    record_df = pd.DataFrame(values['recordings'])
    record_df.to_csv(corepath+ bird.replace(' ', '') + ".csv", index=False)

Creating subdirectory /content/drive/MyDrive/Download_Birds_Data/Dryocopusjavensis/ for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Dryocopus%20javensis&page=1
Found  1  pages in total.
Saved json for  128  files
A total of  128  files will be downloaded
Saving file  1 / 128 /content/drive/MyDrive/Download_Birds_Data/665786Dryocopusjavensis.wav
Saving file  2 / 128 /content/drive/MyDrive/Download_Birds_Data/656517Dryocopusjavensis.wav
Saving file  3 / 128 /content/drive/MyDrive/Download_Birds_Data/649669Dryocopusjavensis.wav
Saving file  4 / 128 /content/drive/MyDrive/Download_Birds_Data/629324Dryocopusjavensis.wav
Saving file  5 / 128 /content/drive/MyDrive/Download_Birds_Data/620581Dryocopusjavensis.wav
Saving file  6 / 128 /content/drive/MyDrive/Download_Birds_Data/620578Dryocopusjavensis.wav
Saving file  7 / 128 /content/drive/MyDrive/Download_Birds_Data/620577Dryocopusjavensis.wav
Saving file  8 / 128 /content/drive/MyDrive/Download_Bird

In [None]:
# List all Species here:
birds = ['Orthotomus sericeus', 'Pycnonotus plumosus', 'Malacocincla sepiaria',
         'Malacopteron affine', 'Pachycephala cinerea']

In [None]:
for bird in birds:
    download(bird, bird.replace(' ', ''), 'countries')

    #Convert Json per species into csv file
    # Get the json entries from your downloaded json
    jsonFile = open(corepath + bird.replace(' ', '') + "_jsondata_p1" + ".json", 'r')
    values = json.load(jsonFile)
    jsonFile.close()

    # Create a pandas dataframe of records & convert to .csv file
    record_df = pd.DataFrame(values['recordings'])
    record_df.to_csv(corepath+ bird.replace(' ', '') + ".csv", index=False)

Creating subdirectory /content/drive/MyDrive/Download_Birds_Data/Orthotomussericeus/ for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Orthotomus%20sericeus&page=1
Found  1  pages in total.
Saved json for  122  files
A total of  122  files will be downloaded
Saving file  1 / 122 /content/drive/MyDrive/Download_Birds_Data/665779Orthotomussericeus.wav
Saving file  2 / 122 /content/drive/MyDrive/Download_Birds_Data/663167Orthotomussericeus.wav
Saving file  3 / 122 /content/drive/MyDrive/Download_Birds_Data/657597Orthotomussericeus.wav
Saving file  4 / 122 /content/drive/MyDrive/Download_Birds_Data/646677Orthotomussericeus.wav
Saving file  5 / 122 /content/drive/MyDrive/Download_Birds_Data/646676Orthotomussericeus.wav
Saving file  6 / 122 /content/drive/MyDrive/Download_Birds_Data/578925Orthotomussericeus.wav
Saving file  7 / 122 /content/drive/MyDrive/Download_Birds_Data/578791Orthotomussericeus.wav
Saving file  8 / 122 /content/drive/MyDrive/Down

In [None]:
# List all Species here:
birds = ['Corvus enca', 'Pycnonotus erythropthalmos', 'Elanus caeruleus',
         'Anthracoceros albirostris']

In [None]:
for bird in birds:
    download(bird, bird.replace(' ', ''), 'countries')

    #Convert Json per species into csv file
    # Get the json entries from your downloaded json
    jsonFile = open(corepath + bird.replace(' ', '') + "_jsondata_p1" + ".json", 'r')
    values = json.load(jsonFile)
    jsonFile.close()

    # Create a pandas dataframe of records & convert to .csv file
    record_df = pd.DataFrame(values['recordings'])
    record_df.to_csv(corepath+ bird.replace(' ', '') + ".csv", index=False)

Creating subdirectory /content/drive/MyDrive/Download_Birds_Data/Corvusenca/ for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Corvus%20enca&page=1
Found  1  pages in total.
Saved json for  122  files
A total of  122  files will be downloaded
Saving file  1 / 122 /content/drive/MyDrive/Download_Birds_Data/657295Corvusenca.wav
Saving file  2 / 122 /content/drive/MyDrive/Download_Birds_Data/628125Corvusenca.wav
Saving file  3 / 122 /content/drive/MyDrive/Download_Birds_Data/616145Corvusenca.wav
Saving file  4 / 122 /content/drive/MyDrive/Download_Birds_Data/616142Corvusenca.wav
Saving file  5 / 122 /content/drive/MyDrive/Download_Birds_Data/616141Corvusenca.wav
Saving file  6 / 122 /content/drive/MyDrive/Download_Birds_Data/614886Corvusenca.wav
Saving file  7 / 122 /content/drive/MyDrive/Download_Birds_Data/614885Corvusenca.wav
Saving file  8 / 122 /content/drive/MyDrive/Download_Birds_Data/614882Corvusenca.wav
Saving file  9 / 122 /content/drive

In [None]:
# List all Species here:
birds = ['Dicrurus paradiseus']

for bird in birds:
    download(bird + ' q:A', bird.replace(' ', ''), 'countries')

    #Convert Json per species into csv file
    # Get the json entries from your downloaded json
    jsonFile = open(corepath + bird.replace(' ', '') + "_jsondata_p1" + ".json", 'r')
    values = json.load(jsonFile)
    jsonFile.close()

    # Create a pandas dataframe of records & convert to .csv file
    record_df = pd.DataFrame(values['recordings'])
    record_df.to_csv(corepath+ bird.replace(' ', '') + ".csv", index=False)

Creating subdirectory /content/drive/MyDrive/Download_Birds_Data/Dicrurusparadiseus/ for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Dicrurus%20paradiseus%20q:A&page=1
Found  1  pages in total.
Saved json for  124  files
A total of  124  files will be downloaded
Saving file  1 / 124 /content/drive/MyDrive/Download_Birds_Data/666146Dicrurusparadiseus.wav
Saving file  2 / 124 /content/drive/MyDrive/Download_Birds_Data/657036Dicrurusparadiseus.wav
Saving file  3 / 124 /content/drive/MyDrive/Download_Birds_Data/654662Dicrurusparadiseus.wav
Saving file  4 / 124 /content/drive/MyDrive/Download_Birds_Data/654538Dicrurusparadiseus.wav
Saving file  5 / 124 /content/drive/MyDrive/Download_Birds_Data/654528Dicrurusparadiseus.wav
Saving file  6 / 124 /content/drive/MyDrive/Download_Birds_Data/651708Dicrurusparadiseus.wav
Saving file  7 / 124 /content/drive/MyDrive/Download_Birds_Data/632042Dicrurusparadiseus.wav
Saving file  8 / 124 /content/drive/MyDriv

#Convert Meta Data

In [None]:
os.chdir(corepath)

extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "birds_metadata.csv", index=False, encoding='utf-8-sig')
print("==========Done Convert CSV========")



In [None]:
df = pd.read_csv('/content/drive/MyDrive/Download_Birds_Data/birds_metadata.csv', delimiter=',')

In [None]:
df.head()

Unnamed: 0,id,gen,sp,ssp,en,rec,cnt,loc,lat,lng,alt,type,url,file,file-name,sono,lic,q,length,time,date,uploaded,also,rmk,bird-seen,playback-used
0,665786,Dryocopus,javensis,,White-bellied Woodpecker,Okamoto Keita Sin,Brunei,"Jalan Badas, Belait",4.559486,114.421341,,Call,//www.xeno-canto.org/665786,//www.xeno-canto.org/665786/download,"XC665786-Woodpecker, White-bellied_2021-05-25_...",{'small': '//www.xeno-canto.org/sounds/uploade...,//creativecommons.org/licenses/by-nc-sa/4.0/,A,0:05,07:25,2021-05-25,2021-07-31,[''],Acknowledgements: I took this sound recording ...,no,no
1,656517,Dryocopus,javensis,javensis,White-bellied Woodpecker,Ding Li Yong,Malaysia,"Panti Bird Sanctuary, Johor",1.8773,103.9181,10.0,"adult, call",//www.xeno-canto.org/656517,//www.xeno-canto.org/656517/download,XC656517-White-bellied Woodpecker_Mersing_DLY.mp3,{'small': '//www.xeno-canto.org/sounds/uploade...,//creativecommons.org/licenses/by-nc-nd/4.0/,A,0:15,10:30,2017-06-29,2021-06-14,[''],,no,no
2,649669,Dryocopus,javensis,parvus,White-bellied Woodpecker,Ding Li Yong,Indonesia,"Teupah Barat, Kabupaten Simeulue, Aceh",2.4634,96.3007,20.0,"adult, song",//www.xeno-canto.org/649669,//www.xeno-canto.org/649669/download,XC649669-Simeulue Woodpecker3_Teupah_DLY.mp3,{'small': '//www.xeno-canto.org/sounds/uploade...,//creativecommons.org/licenses/by-nc-nd/4.0/,A,0:23,08:30,2017-03-23,2021-05-19,[''],,yes,no
3,629324,Dryocopus,javensis,,White-bellied Woodpecker,Vincent,China,"Diqing Tibetan Autonomous Prefecture, Yunnan",27.6836,99.5554,2920.0,"adult, call, male",//www.xeno-canto.org/629324,//www.xeno-canto.org/629324/download,XC629324- - White-bellied Woodpecker - Call - ...,{'small': '//www.xeno-canto.org/sounds/uploade...,//creativecommons.org/licenses/by-nc-sa/4.0/,A,0:58,11:00,2021-03-06,2021-03-16,[''],,yes,no
4,620581,Dryocopus,javensis,confusus,White-bellied Woodpecker,Jelle Scharringa,Philippines,"Pagbilao, Quezon, Calabarzon",13.9844,121.8107,170.0,"call, male",//www.xeno-canto.org/620581,//www.xeno-canto.org/620581/download,XC620581-White-bellied Woodpecker (Dryocopus j...,{'small': '//www.xeno-canto.org/sounds/uploade...,//creativecommons.org/licenses/by-nc-sa/4.0/,A,0:23,12:50,1985-04-13,2021-02-09,[''],One bird 3m up in tree at forest edge along road,yes,no


#Total of .wav files

In [None]:
blist=[]
for root, dirs, files in os.walk(corepath):
    for file in files:
        if file.endswith(".wav"):
            blist.append(os.path.join(root, file))
                
print('Found ', len(blist),' files.')

Found  1748  files.
