# Birdify2 
### Data loading demo

![](birdify.png)

In order to be able to run this notebook you will need **Python 3** and the following packages:
* **Tensorflow 1.12.0**
* **PyDub 0.23.0**
* **Pillow 4.0.0**
* ~~**Basemap 1.2.0** *(optional)*~~

You should also have installed **ffmpeg** or **avconv**.

In [None]:
# Load the raw meta data of birds from the xeno-canto page.

import preprocessing.sounds.download_birdify_data as bd

_URL_IF_EMPTY = "http://www.xeno-canto.org/api/2/recordings?query=q:a"

url=_URL_IF_EMPTY

fetcher = bd.FetchData()
nr_of_pgs = fetcher.get_nr_of_pages(url)
recordings = fetcher.get_records_by_area(nr_of_pgs, url)

print("Metadata loaded successfully!")

In [None]:
# Print the basic statistics and the content of a randomly chosen record.

import random as r

print("Number of pages: ", nr_of_pgs)
print("The number of records loaded: ", len(recordings))

print("A random record content:\n", recordings[r.randint(0, len(recordings) - 1)])


In [None]:
# Draw the geographical distribution of the loaded species.
import matplotlib.pyplot as plt
import numpy as np

countries = {}
plt.subplots(figsize=(20, 20))  
for r in recordings:
  
    if "lat" in r and "lng" in r and r["lat"] != None and r["lng"] != None:
    
        # Check against latitudes and longitudes which lie outside the [-90, 90] interval.
        if float(r["lat"]) > 90 or float(r["lat"]) < -90 or float(r["lng"]) > 90 or float(r["lng"]) < -90:
            #print ("Faulty value: ", r["en"], " ", r["cnt"], " ", r["lat"], " ", r["lng"])
            continue        
        
        # Add the country name to a dictionary    
        countries[r["cnt"]] = ""
    
        # Add the coordinate to the plot.
        plt.scatter(float(r["lng"]), float(r["lat"]))            
        
plt.show()    

In [None]:
#Display the list of countries where the samples were recorded.
print ("Countries from where the samples are: ")  
for k, v in countries.items():
    print (k)

In [None]:
# Download the sounds and ceate spectrograms for four randomly selected bird species. 
from preprocessing.download_and_convert_birdify import create_spectro_for_records

number_of_selected_species = 4

idxs = np.random.choice(len(recordings), number_of_selected_species, replace=False)
selected_recordings = [recordings[i] for i in idxs]

create_spectro_for_records(selected_recordings)


In [None]:
from PIL import Image
from os import listdir
from os.path import isfile, join

number_of_spectrograms_to_show = 3

image_size = 224
xtickvalues = [None] * image_size
xtickvalues[0] = 0
xtickvalues[image_size // 3] = 1
xtickvalues[image_size // 3 * 2] = 2
xtickvalues[image_size - 1] = 3

ytickvalues = [None] * image_size
ytickvalues[0] = 44100
ytickvalues[image_size // 2] = 22050
ytickvalues[image_size - 1] = 0

f, axarr = plt.subplots(number_of_selected_species, number_of_spectrograms_to_show, figsize=(20, 20))
for i in range(number_of_selected_species):
    directory_name = "spectrograms\\" + selected_recordings[i]["gen"] + "_" + selected_recordings[i]["sp"] + "\\";
    spectrogram_files = [f for f in listdir(directory_name) if isfile(join(directory_name, f))]
    
    for j in range(number_of_spectrograms_to_show):
        
        if j >= len(spectrogram_files):
            axarr[i,j].remove()
            continue
            
        im_frame = Image.open(directory_name + spectrogram_files[j])        
        np_frame = np.array(im_frame.getdata())
        np_frame = np_frame.reshape(im_frame.size[0], im_frame.size[1], 4)

        axarr[i,j].set_title(selected_recordings[i]["gen"] + " - " + selected_recordings[i]["sp"] + " (" + spectrogram_files[j] + ")")
        axarr[i,j].set_xlabel("Time (s)")
        axarr[i,j].set_ylabel("Frequency (Hz)")
        axarr[i,j].imshow(np_frame)

        plt.sca(axarr[i, j])
        plt.xticks(range(224), xtickvalues)
        plt.yticks(range(224), ytickvalues)

plt.show()