# birdnet analysis concat

This notebook will convert the results of running birdnet analyze on all of birclef-2022 training audio into a single dataframe that should be easier to work with.

In [4]:
import pandas as pd
from pathlib import Path

root = Path("../data/processed/birdnet/analysis")
paths = sorted(root.glob("**/*.csv"))
len(paths)

14824

In [9]:
metadata_df = pd.read_csv("../data/raw/birdclef-2022/train_metadata.csv")
metadata_df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,time,url,filename
0,afrsil1,[],"['call', 'flight call']",12.391,-1.493,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,2.5,08:00,https://www.xeno-canto.org/125458,afrsil1/XC125458.ogg
1,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],19.8801,-155.7254,Euodice cantans,African Silverbill,Dan Lane,Creative Commons Attribution-NonCommercial-Sha...,3.5,08:30,https://www.xeno-canto.org/175522,afrsil1/XC175522.ogg
2,afrsil1,[],"['call', 'song']",16.2901,-16.0321,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:30,https://www.xeno-canto.org/177993,afrsil1/XC177993.ogg
3,afrsil1,[],"['alarm call', 'call']",17.0922,54.2958,Euodice cantans,African Silverbill,Oscar Campbell,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:00,https://www.xeno-canto.org/205893,afrsil1/XC205893.ogg
4,afrsil1,[],['flight call'],21.4581,-157.7252,Euodice cantans,African Silverbill,Ross Gallardy,Creative Commons Attribution-NonCommercial-Sha...,3.0,16:30,https://www.xeno-canto.org/207431,afrsil1/XC207431.ogg


In [18]:
taxonomy_df = pd.read_csv("../data/raw/birdclef-2022/eBird_Taxonomy_v2021.csv")
taxonomy_df.head()

Unnamed: 0,TAXON_ORDER,CATEGORY,SPECIES_CODE,PRIMARY_COM_NAME,SCI_NAME,ORDER1,FAMILY,SPECIES_GROUP,REPORT_AS
0,1,species,ostric2,Common Ostrich,Struthio camelus,Struthioniformes,Struthionidae (Ostriches),Ostriches,
1,6,species,ostric3,Somali Ostrich,Struthio molybdophanes,Struthioniformes,Struthionidae (Ostriches),,
2,7,slash,y00934,Common/Somali Ostrich,Struthio camelus/molybdophanes,Struthioniformes,Struthionidae (Ostriches),,
3,8,species,grerhe1,Greater Rhea,Rhea americana,Rheiformes,Rheidae (Rheas),Rheas,
4,14,species,lesrhe2,Lesser Rhea,Rhea pennata,Rheiformes,Rheidae (Rheas),,


In [23]:
common_to_code = dict(
    list(zip(taxonomy_df["PRIMARY_COM_NAME"], taxonomy_df["SPECIES_CODE"]))
)

In [24]:
path = paths[0]
df = pd.read_csv(path)
name = path.name.split(".")[0]
df["filename"] = f"{path.parent.name}/{name}.ogg"
df["birdnet_label"] = df["Common name"].apply(common_to_code.get)
df

Unnamed: 0,Start (s),End (s),Scientific name,Common name,Confidence,filename,birdnet_label
0,0.0,3.0,Euodice cantans,African Silverbill,0.903,afrsil1/XC125458.ogg,afrsil1
1,0.0,3.0,Euodice malabarica,Indian Silverbill,0.472,afrsil1/XC125458.ogg,indsil
2,3.0,6.0,Euodice cantans,African Silverbill,0.7311,afrsil1/XC125458.ogg,afrsil1
3,6.0,9.0,Euodice cantans,African Silverbill,0.5888,afrsil1/XC125458.ogg,afrsil1


In [26]:
def read_path(path, lookup=common_to_code.get):
    df = pd.read_csv(path)
    name = path.name.split(".")[0]
    df["filename"] = f"{path.parent.name}/{name}.ogg"
    df["birdnet_label"] = df["Common name"].apply(lookup)
    return df


res = []
for path in paths:
    df = read_path(path, lookup=common_to_code.get)
    res.append(df)

df = pd.concat(res)
df

Unnamed: 0,Start (s),End (s),Scientific name,Common name,Confidence,filename,birdnet_label
0,0.0,3.0,Euodice cantans,African Silverbill,0.9030,afrsil1/XC125458.ogg,afrsil1
1,0.0,3.0,Euodice malabarica,Indian Silverbill,0.4720,afrsil1/XC125458.ogg,indsil
2,3.0,6.0,Euodice cantans,African Silverbill,0.7311,afrsil1/XC125458.ogg,afrsil1
3,6.0,9.0,Euodice cantans,African Silverbill,0.5888,afrsil1/XC125458.ogg,afrsil1
0,0.0,3.0,Sarothrura pulchra,White-spotted Flufftail,0.7585,afrsil1/XC175522.ogg,whsflu1
...,...,...,...,...,...,...,...
19,33.0,36.0,Orthotomus ruficeps,Ashy Tailorbird,0.3202,zebdov/XC666195.ogg,ashtai1
20,33.0,36.0,Geopelia striata,Zebra Dove,0.2766,zebdov/XC666195.ogg,zebdov
21,33.0,36.0,Pomatorhinus montanus,Chestnut-backed Scimitar-Babbler,0.2317,zebdov/XC666195.ogg,cbsbab1
22,36.0,39.0,Orthotomus ruficeps,Ashy Tailorbird,0.9385,zebdov/XC666195.ogg,ashtai1


In [29]:
df.rename(
    columns={"Start (s)": "start_sec", "End (s)": "end_sec", "Confidence": "confidence"}
)[["start_sec", "end_sec", "confidence", "filename", "birdnet_label"]]

Unnamed: 0,start_sec,end_sec,confidence,filename,birdnet_label
0,0.0,3.0,0.9030,afrsil1/XC125458.ogg,afrsil1
1,0.0,3.0,0.4720,afrsil1/XC125458.ogg,indsil
2,3.0,6.0,0.7311,afrsil1/XC125458.ogg,afrsil1
3,6.0,9.0,0.5888,afrsil1/XC125458.ogg,afrsil1
0,0.0,3.0,0.7585,afrsil1/XC175522.ogg,whsflu1
...,...,...,...,...,...
19,33.0,36.0,0.3202,zebdov/XC666195.ogg,ashtai1
20,33.0,36.0,0.2766,zebdov/XC666195.ogg,zebdov
21,33.0,36.0,0.2317,zebdov/XC666195.ogg,cbsbab1
22,36.0,39.0,0.9385,zebdov/XC666195.ogg,ashtai1
