# Embeddings Work
##### By Pranjal Chatterjee
Exploratory notebook for working on birdnet embeddings

### Imports and Setup

In [1]:
import pandas as pd
import os
from annotation_post_processing import *

In [2]:
embeddingColumns = [str(i) for i in range(420)]
columnNames = ["START", "END"] + embeddingColumns
path = './input/XC_embeddings/'

In [3]:
def generate_embeddings_from_file(path, filename):
    with open(path + filename, 'r') as f:
        data = f.read()
    with open(path + filename, 'w') as f:
        f.write(",".join(data.split("\t")))
    file_df = pd.read_csv(path + filename, names = columnNames)
    file_df["IN FILE"] = filename[:filename.index(".birdnet")] + ".wav"
    return file_df

def generate_embeddings(path):
    df = pd.DataFrame()
    for filename in os.listdir(path):
        df = pd.concat([df, generate_embeddings_from_file(path, filename)], ignore_index = True)
    df["PATH"] = path
    columns = df.columns.tolist()
    columns = columns[-2:] + columns[:-2]
    df = df[columns]
    df = df.sort_values("IN FILE", ascending = True)
    df = df.reset_index(drop = True)
    return df

In [4]:
embeddings_df = generate_embeddings(path)
embeddings_df

Unnamed: 0,IN FILE,PATH,START,END,0,1,2,3,4,5,...,410,411,412,413,414,415,416,417,418,419
0,Accipiter-collaris-260336.wav,./input/XC_embeddings/,0.0,3.0,0.309174,0.446021,0.281863,1.577357,0.373982,1.399313,...,0.158719,1.222846,0.322246,0.106338,1.150655,0.071494,0.212481,0.575966,0.919139,0.243595
1,Accipiter-collaris-260336.wav,./input/XC_embeddings/,3.0,6.0,0.888699,0.378584,0.808768,1.361410,1.220782,0.914684,...,1.057637,2.407450,0.700646,0.619101,1.727198,0.746244,0.915833,1.531033,1.508690,0.454181
2,Accipiter-collaris-260336.wav,./input/XC_embeddings/,6.0,9.0,0.459180,0.318852,0.201550,1.143557,0.749312,1.322122,...,0.174462,0.966907,0.453690,0.383807,2.245309,0.238596,0.133288,0.366940,1.135604,1.010370
3,Accipiter-collaris-260336.wav,./input/XC_embeddings/,9.0,12.0,0.802918,0.458812,0.832160,1.008306,1.113528,0.493458,...,0.774734,2.153570,0.820461,0.469946,1.541887,0.500640,0.868078,1.275221,1.243478,0.302507
4,Accipiter-collaris-260336.wav,./input/XC_embeddings/,12.0,15.0,0.485405,0.223866,0.760072,1.112765,0.896280,0.427797,...,0.635621,1.647322,0.863135,0.449863,1.383385,0.285739,0.400612,0.884621,1.003742,0.380692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1210,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,3.0,6.0,0.634414,0.267838,0.256490,1.193340,1.013062,0.070845,...,0.377230,0.257027,1.540528,1.250743,0.307226,0.360740,0.609170,1.440667,0.455522,1.108260
1211,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,0.0,3.0,0.331101,0.735028,0.293684,0.966468,0.474102,0.234102,...,0.738214,1.121028,1.635089,1.508728,0.720707,0.607238,0.913978,2.380363,0.714863,0.901613
1212,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,15.0,18.0,1.131276,0.352781,0.349888,0.971194,0.801534,0.001061,...,0.630672,0.409479,1.247414,1.086553,0.047821,0.824271,1.134258,1.857706,0.959904,0.880479
1213,Xiphorhynchus-obsoletus-437341.wav,./input/XC_embeddings/,0.0,3.0,0.944446,0.984069,0.626557,0.458525,1.379129,0.964276,...,0.857175,1.268004,0.320324,2.043832,1.004998,0.775938,0.987647,1.956105,0.792508,1.047974


In [5]:
annotations_df = pd.read_csv("xc_annotations.csv")
annotations_df

Unnamed: 0,FOLDER,IN FILE,CLIP LENGTH,CHANNEL,OFFSET,DURATION,SAMPLE RATE,MANUAL ID,CONFIDENCE
0,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,0.0,3.0,48000,Sandwich Tern,0.3737
1,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,0.0,3.0,48000,Great Tit,0.1229
2,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,0.0,3.0,48000,Laughing Gull,0.1151
3,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,6.0,3.0,48000,Thrush Nightingale,0.8343
4,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,6.0,3.0,48000,Great Crested Tern,0.2927
...,...,...,...,...,...,...,...,...,...
1244,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,9.0,3.0,48000,Reddish Hermit,0.3146
1245,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,15.0,3.0,48000,Reddish Hermit,0.3242
1246,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,18.0,3.0,48000,Reddish Hermit,0.1272
1247,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,21.0,3.0,48000,Black-capped Antwren,0.4082


In [6]:
manual_df = pd.read_csv("mixed_bird_sample.csv")
manual_df = annotation_chunker(manual_df, 3)
manual_df

Unnamed: 0,IN FILE,CLIP LENGTH,CHANNEL,OFFSET,DURATION,SAMPLE RATE,MANUAL ID
0,Accipiter-collaris-260336.wav,26.93220,0,0.0,3.0,44100,Accipiter collaris Semicollared Hawk
1,Accipiter-collaris-260336.wav,26.93220,0,6.0,3.0,44100,Accipiter collaris Semicollared Hawk
2,Accipiter-collaris-260336.wav,26.93220,0,18.0,3.0,44100,Accipiter collaris Semicollared Hawk
3,Accipiter-collaris-260336.wav,26.93220,0,21.0,3.0,44100,Accipiter collaris Semicollared Hawk
4,Accipiter-poliogaster-121768.wav,121.75200,0,0.0,3.0,48000,Accipiter poliogaster Grey-bellied Hawk
...,...,...,...,...,...,...,...
837,Xenopipo-atronitens-504798.wav,25.83510,0,0.0,3.0,44100,Xenopipo atronitens Black Manakin
838,Xenopipo-atronitens-504798.wav,25.83510,0,3.0,3.0,44100,Xenopipo atronitens Black Manakin
839,Xenopipo-atronitens-504798.wav,25.83510,0,21.0,3.0,44100,Xenopipo atronitens Black Manakin
840,Xiphorhynchus-obsoletus-437341.wav,6.11265,0,0.0,3.0,44100,Xiphorhynchus obsoletus Striped Woodcreeper


In [7]:
# Data cleaning
intersection_files = list(set(embeddings_df["IN FILE"].unique()).intersection(set(annotations_df["IN FILE"])))
embeddings_df = embeddings_df[embeddings_df["IN FILE"].isin(intersection_files)]
annotations_df = annotations_df[annotations_df["IN FILE"].isin(intersection_files)]
manual_df = manual_df[manual_df["IN FILE"].isin(intersection_files)]

In [8]:
embeddings_df

Unnamed: 0,IN FILE,PATH,START,END,0,1,2,3,4,5,...,410,411,412,413,414,415,416,417,418,419
0,Accipiter-collaris-260336.wav,./input/XC_embeddings/,0.0,3.0,0.309174,0.446021,0.281863,1.577357,0.373982,1.399313,...,0.158719,1.222846,0.322246,0.106338,1.150655,0.071494,0.212481,0.575966,0.919139,0.243595
1,Accipiter-collaris-260336.wav,./input/XC_embeddings/,3.0,6.0,0.888699,0.378584,0.808768,1.361410,1.220782,0.914684,...,1.057637,2.407450,0.700646,0.619101,1.727198,0.746244,0.915833,1.531033,1.508690,0.454181
2,Accipiter-collaris-260336.wav,./input/XC_embeddings/,6.0,9.0,0.459180,0.318852,0.201550,1.143557,0.749312,1.322122,...,0.174462,0.966907,0.453690,0.383807,2.245309,0.238596,0.133288,0.366940,1.135604,1.010370
3,Accipiter-collaris-260336.wav,./input/XC_embeddings/,9.0,12.0,0.802918,0.458812,0.832160,1.008306,1.113528,0.493458,...,0.774734,2.153570,0.820461,0.469946,1.541887,0.500640,0.868078,1.275221,1.243478,0.302507
4,Accipiter-collaris-260336.wav,./input/XC_embeddings/,12.0,15.0,0.485405,0.223866,0.760072,1.112765,0.896280,0.427797,...,0.635621,1.647322,0.863135,0.449863,1.383385,0.285739,0.400612,0.884621,1.003742,0.380692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1208,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,18.0,21.0,0.936170,0.440020,0.140369,1.143199,0.810080,0.000000,...,0.941157,0.540462,1.123100,1.395664,0.144218,0.912376,0.538296,1.880466,0.549861,0.524407
1209,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,21.0,24.0,0.231170,0.528941,0.635815,1.085044,0.720186,0.034490,...,0.431068,0.247088,0.888073,0.889762,0.626793,0.382264,0.326128,1.382699,0.684648,0.712799
1210,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,3.0,6.0,0.634414,0.267838,0.256490,1.193340,1.013062,0.070845,...,0.377230,0.257027,1.540528,1.250743,0.307226,0.360740,0.609170,1.440667,0.455522,1.108260
1211,Xenopipo-atronitens-504798.wav,./input/XC_embeddings/,0.0,3.0,0.331101,0.735028,0.293684,0.966468,0.474102,0.234102,...,0.738214,1.121028,1.635089,1.508728,0.720707,0.607238,0.913978,2.380363,0.714863,0.901613


In [9]:
annotations_df

Unnamed: 0,FOLDER,IN FILE,CLIP LENGTH,CHANNEL,OFFSET,DURATION,SAMPLE RATE,MANUAL ID,CONFIDENCE
0,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,0.0,3.0,48000,Sandwich Tern,0.3737
1,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,0.0,3.0,48000,Great Tit,0.1229
2,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,0.0,3.0,48000,Laughing Gull,0.1151
3,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,6.0,3.0,48000,Thrush Nightingale,0.8343
4,../passive-acoustic-biodiversity/BirdnetEmbedd...,Accipiter-collaris-260336.wav,58.628027,1,6.0,3.0,48000,Great Crested Tern,0.2927
...,...,...,...,...,...,...,...,...,...
1244,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,9.0,3.0,48000,Reddish Hermit,0.3146
1245,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,15.0,3.0,48000,Reddish Hermit,0.3242
1246,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,18.0,3.0,48000,Reddish Hermit,0.1272
1247,../passive-acoustic-biodiversity/BirdnetEmbedd...,Xenopipo-atronitens-504798.wav,56.239683,1,21.0,3.0,48000,Black-capped Antwren,0.4082


In [10]:
manual_df

Unnamed: 0,IN FILE,CLIP LENGTH,CHANNEL,OFFSET,DURATION,SAMPLE RATE,MANUAL ID
0,Accipiter-collaris-260336.wav,26.9322,0,0.0,3.0,44100,Accipiter collaris Semicollared Hawk
1,Accipiter-collaris-260336.wav,26.9322,0,6.0,3.0,44100,Accipiter collaris Semicollared Hawk
2,Accipiter-collaris-260336.wav,26.9322,0,18.0,3.0,44100,Accipiter collaris Semicollared Hawk
3,Accipiter-collaris-260336.wav,26.9322,0,21.0,3.0,44100,Accipiter collaris Semicollared Hawk
4,Accipiter-poliogaster-121768.wav,121.7520,0,0.0,3.0,48000,Accipiter poliogaster Grey-bellied Hawk
...,...,...,...,...,...,...,...
835,Vireo-leucophrys-3158.wav,23.0139,0,15.0,3.0,44100,Vireo leucophrys Brown-capped Vireo
836,Vireo-leucophrys-3158.wav,23.0139,0,18.0,3.0,44100,Vireo leucophrys Brown-capped Vireo
837,Xenopipo-atronitens-504798.wav,25.8351,0,0.0,3.0,44100,Xenopipo atronitens Black Manakin
838,Xenopipo-atronitens-504798.wav,25.8351,0,3.0,3.0,44100,Xenopipo atronitens Black Manakin


In [11]:
# Checking to see if same number of files are in both embeddings_df and annotations_df
print(len(embeddings_df["IN FILE"].unique()), len(annotations_df["IN FILE"].unique()))

96 96


### Centroid Exploration