In [1]:
from PyHa.statistics import *
from PyHa.IsoAutio import *
from PyHa.visualizations import *
from PyHa.annotation_post_processing import *
import pandas as pd
import os
import time

In [2]:
# helper functions
def get_species_name(filename):
    split_filename = filename.split("-")

# Processing Manually Labeled Madre de Dios XC Bird Labels

In [3]:
data_path = "./Mixed_Bird/"
label_path = "MDD_Xeno_Canto_DSC180_Labels.csv"
manual_labels = pd.read_csv(label_path)
manual_labels["FOLDER"] = [data_path] * len(manual_labels)
print(len(manual_labels))

31365


In [4]:
# filtering the high priority species
species_count_dict = {}
for filename in os.listdir(data_path):
    #print(filename)
    split_filename = filename.split("-")
    species_name = split_filename[0] + '-' + split_filename[1]
    if species_name not in species_count_dict.keys():
        species_count_dict[species_name] = 1
    else:
        species_count_dict[species_name] += 1
high_priority_species = set()
for species in species_count_dict.keys():
    if species_count_dict[species] >= 3:
        high_priority_species.add(species)

# Performing the BirdNET DSP Segmentation on the dataset

In [5]:
isolation_parameters_birdnet = {
    "model" : "fg_bg_dsp_sep",
    "technique" : "simple",
    "threshold_type" : "pure",
    "threshold_const" : 0.5,
    "verbose" : True,
    "kernel_size" : 4,
    "power_threshold" : 3.0,
    "threshold_min" : 0.0
}

In [6]:
#start_time = time.time()
#automated_labels_fg_bg = generate_automated_labels( data_path, isolation_parameters_birdnet)
#end_time = time.time()
#print(end_time - start_time)

# Performing Microfaune Segmentation on the dataset

In [7]:
data_path = "./Mixed_Bird/"
# parameters for Microfaune
isolation_parameters_microfaune = {
     "model" :          "microfaune",
     "technique" :       "steinberg",
     "threshold_type" :  "median",
     "threshold_const" : 3.2,
     "threshold_min" :   0.12,
     "window_size" :     1.5,
     "verbose"     :     True
}

In [8]:
#start_time = time.time()
#automated_labels_microfaune = generate_automated_labels( data_path, isolation_parameters_microfaune)
#end_time = time.time()
#print(end_time - start_time)

# Performing Tweetynet Segmentation on the dataset

In [9]:
data_path = "./Mixed_Bird/"
# Example parameters for TweetyNET
isolation_parameters_tweetynet = {
     "model" : "tweetynet",
     "tweety_output": True,
     "verbose" : True
}

In [10]:
#start_time = time.time()
#automated_labels_tweetynet = generate_automated_labels( data_path, isolation_parameters_tweetynet)
#end_time = time.time()
#print(end_time - start_time)

# Label Post-processing
## Converting all labels to 3s
## Keeping only the "High Priority" Species
## Making sure that only labels used from the same clips 

In [11]:
#automated_labels_fg_bg.to_csv("fg_bg_mdd.csv", index=False)

In [12]:
#automated_labels_tweetynet.to_csv("tweetynet_mdd.csv", index=False)

In [13]:
#automated_labels_microfaune.to_csv("microfaune_mdd.csv", index=False)

In [14]:
automated_labels_fg_bg = pd.read_csv("fg_bg_mdd.csv")
automated_labels_tweetynet = pd.read_csv("tweetynet_mdd.csv")
automated_labels_microfaune = pd.read_csv("microfaune_mdd.csv")

In [15]:
#species_column = []
#for row in automated_labels_microfaune.index:
#    split_filename = automated_labels_microfaune["IN FILE"][row].split("-")
#    species_name = split_filename[0] + '-' + split_filename[1]
#    species_column.append(species_name)
#automated_labels_microfaune["MANUAL ID"] = species_column

In [16]:
#species_column = []
#for row in automated_labels_fg_bg.index:
#    split_filename = automated_labels_fg_bg["IN FILE"][row].split("-")
#    species_name = split_filename[0] + '-' + split_filename[1]
#    species_column.append(species_name)
#automated_labels_fg_bg["MANUAL ID"] = species_column

In [17]:
#species_column = []
#for row in automated_labels_tweetynet.index:
#    split_filename = automated_labels_tweetynet["IN FILE"][row].split("-")
#    species_name = split_filename[0] + '-' + split_filename[1]
#    species_column.append(species_name)
#automated_labels_tweetynet["MANUAL ID"] = species_column

In [18]:
#species_column = []
#for row in manual_labels.index:
#    split_filename = manual_labels["IN FILE"][row].split("-")
#    species_name = split_filename[0] + '-' + split_filename[1]
#    species_column.append(species_name)
#manual_labels["MANUAL ID"] = species_column

In [19]:
#automated_labels_fg_bg = automated_labels_fg_bg[automated_labels_fg_bg["MANUAL ID"].isin(high_priority_species)]
#automated_labels_microfaune = automated_labels_microfaune[automated_labels_microfaune["MANUAL ID"].isin(high_priority_species)]
#automated_labels_tweetynet = automated_labels_tweetynet[automated_labels_tweetynet["MANUAL ID"].isin(high_priority_species)]
#manual_df = manual_labels[manual_labels["MANUAL ID"].isin(high_priority_species)]

In [20]:
#clip_set = set(manual_df["IN FILE"].unique())
#automated_labels_fg_bg = automated_labels_fg_bg[automated_labels_fg_bg["IN FILE"].isin(clip_set)]
#automated_labels_microfaune = automated_labels_microfaune[automated_labels_microfaune["IN FILE"].isin(clip_set)]
#automated_labels_tweetynet = automated_labels_tweetynet[automated_labels_tweetynet["IN FILE"].isin(clip_set)]

In [21]:
#automated_labels_fg_bg.to_csv("fg_bg_pre.csv", index=False)
#automated_labels_microfaune.to_csv("microfaune_pre.csv", index=False)
#automated_labels_tweetynet.to_csv("tweetynet_pre.csv", index=False)
#manual_df.to_csv("manual_pre.csv", index=False)
#print(len(automated_labels_fg_bg))
#print(len(automated_labels_microfaune))
#print(len(automated_labels_tweetynet))

In [22]:
# converting the labels into 3s segments
automated_labels_fg_bg_3s = annotation_chunker(automated_labels_fg_bg, 3)

In [23]:
automated_labels_microfaune_3s = annotation_chunker(automated_labels_microfaune, 3)

In [24]:
automated_labels_tweetynet_3s = annotation_chunker(automated_labels_tweetynet, 3)

In [25]:
manual_labels = annotation_chunker(manual_labels, 3)

In [26]:
print(len(automated_labels_fg_bg_3s))
print(len(automated_labels_microfaune_3s))
print(len(automated_labels_tweetynet_3s))

34339
20938
29245


In [27]:
species_column = []
for row in automated_labels_microfaune_3s.index:
    split_filename = automated_labels_microfaune_3s["IN FILE"][row].split("-")
    species_name = split_filename[0] + '-' + split_filename[1]
    species_column.append(species_name)
automated_labels_microfaune_3s["MANUAL ID"] = species_column

In [28]:
species_column = []
for row in automated_labels_fg_bg_3s.index:
    split_filename = automated_labels_fg_bg_3s["IN FILE"][row].split("-")
    species_name = split_filename[0] + '-' + split_filename[1]
    species_column.append(species_name)
automated_labels_fg_bg_3s["MANUAL ID"] = species_column

In [29]:
species_column = []
for row in automated_labels_tweetynet_3s.index:
    split_filename = automated_labels_tweetynet_3s["IN FILE"][row].split("-")
    species_name = split_filename[0] + '-' + split_filename[1]
    species_column.append(species_name)
automated_labels_tweetynet_3s["MANUAL ID"] = species_column

In [30]:
species_column = []
for row in manual_labels.index:
    split_filename = manual_labels["IN FILE"][row].split("-")
    species_name = split_filename[0] + '-' + split_filename[1]
    species_column.append(species_name)
manual_labels["MANUAL ID"] = species_column

In [31]:
fg_bg_automated_df = automated_labels_fg_bg_3s[automated_labels_fg_bg_3s["MANUAL ID"].isin(high_priority_species)]
microfaune_automated_df = automated_labels_microfaune_3s[automated_labels_microfaune_3s["MANUAL ID"].isin(high_priority_species)]
tweetynet_automated_df = automated_labels_tweetynet_3s[automated_labels_tweetynet_3s["MANUAL ID"].isin(high_priority_species)]
manual_df = manual_labels[manual_labels["MANUAL ID"].isin(high_priority_species)]

In [32]:
print(len(fg_bg_automated_df))
#print(len(fg_bg_automated_df["IN FILE"].unique()))
print(len(microfaune_automated_df))
#print(len(microfaune_automated_df["IN FILE"].unique()))
print(len(tweetynet_automated_df))
#print(len(tweetynet_automated_df["IN FILE"].unique()))
print(len(manual_df))
#print(len(manual_df["IN FILE"].unique()))

26689
16208
22739
17125


In [33]:
clip_set = set(manual_df["IN FILE"].unique())

In [34]:
fg_bg_automated_df = fg_bg_automated_df[fg_bg_automated_df["IN FILE"].isin(clip_set)]
microfaune_automated_df = microfaune_automated_df[microfaune_automated_df["IN FILE"].isin(clip_set)]
tweetynet_automated_df = tweetynet_automated_df[tweetynet_automated_df["IN FILE"].isin(clip_set)]

In [35]:
print("Number of Human Annotations: ", len(manual_df))
print("Number of Clips", len(manual_df["IN FILE"].unique()))
print("Number of FG-BG Separation Annotations", len(fg_bg_automated_df))
print("Number of Clips where FG-BG identified birds", len(fg_bg_automated_df["IN FILE"].unique()))
print("Number of Microfaune Automated Annotations", len(microfaune_automated_df))
print("Number of Clips where Microfaune identified birds", len(microfaune_automated_df["IN FILE"].unique()))
print("Number of Tweetynet Automated Annotations", len(tweetynet_automated_df))
print("Number of Clips where Tweetynet identified birds", len(tweetynet_automated_df["IN FILE"].unique()))
fg_bg_automated_df.to_csv("fg_bg_automated_labels.csv", index=False)
microfaune_automated_df.to_csv("microfaune_automated_df.csv", index=False)
fg_bg_automated_df.to_csv("tweetynet_automated_df.csv", index=False)
manual_df.to_csv( "manual_labels_processed.csv", index=False)

Number of Human Annotations:  17125
Number of Clips 1892
Number of FG-BG Separation Annotations 21567
Number of Clips where FG-BG identified birds 1891
Number of Microfaune Automated Annotations 13187
Number of Clips where Microfaune identified birds 1494
Number of Tweetynet Automated Annotations 18359
Number of Clips where Tweetynet identified birds 1887


# Calculating Precision and Recall

## Clip Metrics

In [36]:
fg_bg_automated_df["FOLDER"] = [data_path] * len(fg_bg_automated_df)
microfaune_automated_df["FOLDER"] = [data_path] * len(microfaune_automated_df)
tweetynet_automated_df["FOLDER"] = [data_path] * len(tweetynet_automated_df)

In [37]:
clip_statistics_df_fg_bg = clip_statistics(fg_bg_automated_df,manual_df,stats_type = "general");
clip_statistics_df_fg_bg.to_csv("fg_bg_clip_stats.csv", index=False)

In [38]:
clip_statistics_df_microfaune = clip_statistics(microfaune_automated_df,manual_df,stats_type = "general");
clip_statistics_df_microfaune.to_csv("microfaune_clip_stats.csv", index=False)

Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calc

In [39]:
clip_statistics_df_tweetynet = clip_statistics(tweetynet_automated_df,manual_df,stats_type = "general");
clip_statistics_df_tweetynet.to_csv("tweetynet_clip_stats.csv", index=False)

## Class Metrics

In [40]:
class_statistics_df_fg_bg = class_statistics(clip_statistics_df_fg_bg)
class_statistics_df_fg_bg.to_csv("fg_bg_class_stats.csv", index=False)

In [41]:
class_statistics_df_microfaune = class_statistics(clip_statistics_df_microfaune)
class_statistics_df_microfaune.to_csv("microfaune_class_stats.csv", index=False)

  f1 = 2 * (precision * recall) / (precision + recall)


In [42]:
class_statistics_df_tweetynet = class_statistics(clip_statistics_df_tweetynet)
class_statistics_df_tweetynet.to_csv("tweetynet_class_stats.csv", index=False)

## Global Metrics

In [43]:
fg_bg_automated_df["MANUAL ID"] = ["bird"] * len(fg_bg_automated_df)
fg_bg_automated_df["FOLDER"] = [data_path] * len(fg_bg_automated_df)
microfaune_automated_df["MANUAL ID"] = ["bird"] * len(microfaune_automated_df)
microfaune_automated_df["FOLDER"] = [data_path] * len(microfaune_automated_df)
tweetynet_automated_df["MANUAL ID"] = ["bird"] * len(tweetynet_automated_df)
tweetynet_automated_df["FOLDER"] = [data_path] * len(tweetynet_automated_df)
manual_df["MANUAL ID"] = ["bird"] * len(manual_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  manual_df["MANUAL ID"] = ["bird"] * len(manual_df)


In [44]:
statistics_df_fg_bg = automated_labeling_statistics(fg_bg_automated_df,manual_df,stats_type = "general");

Processed 50 clips in 1.8 seconds
Processed 100 clips in 1.6 seconds
Processed 150 clips in 1.7 seconds
Processed 200 clips in 1.7 seconds
Processed 250 clips in 1.8 seconds
Processed 300 clips in 1.9 seconds
Processed 350 clips in 1.5 seconds
Processed 400 clips in 1.8 seconds
Processed 450 clips in 1.8 seconds
Processed 500 clips in 1.6 seconds
Processed 550 clips in 1.7 seconds
Processed 600 clips in 1.5 seconds
Processed 650 clips in 1.4 seconds
Processed 700 clips in 1.5 seconds
Processed 750 clips in 1.7 seconds
Processed 800 clips in 1.4 seconds
Processed 850 clips in 1.5 seconds
Processed 900 clips in 1.8 seconds
Processed 950 clips in 1.4 seconds
Processed 1000 clips in 1.7 seconds
Processed 1050 clips in 1.6 seconds
Processed 1100 clips in 1.3 seconds
Processed 1150 clips in 2.2 seconds
Processed 1200 clips in 1.7 seconds
Processed 1250 clips in 1.7 seconds
Processed 1300 clips in 1.7 seconds
Processed 1350 clips in 1.8 seconds
Processed 1400 clips in 1.7 seconds
Processed 14

In [45]:
statistics_df_microfaune = automated_labeling_statistics(microfaune_automated_df,manual_df,stats_type = "general");

Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Processed 50 clips in 1.8 seconds
Error calculating statistics, likely due
        to zero division, setting values to zero
Processed 100 clips in 1.6 seconds
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Processed 150 clips in 1.9 seconds
Error calculating statistics, likely due
        to zero division, setting values to zero
Processed 200 clips in 1.7 seconds
Processed 250 clips in 2.0 seconds
Error calculating statistics, likely due
        to zero division, setting values to zero
Error calculating statistics, likely due
        to zero division, setting values to zero
Processed 300 cl

In [46]:
statistics_df_tweetynet = automated_labeling_statistics(tweetynet_automated_df,manual_df,stats_type = "general");

Processed 50 clips in 1.8 seconds
Processed 100 clips in 1.5 seconds
Processed 150 clips in 1.5 seconds
Processed 200 clips in 1.6 seconds
Processed 250 clips in 1.8 seconds
Processed 300 clips in 1.8 seconds
Processed 350 clips in 1.5 seconds
Processed 400 clips in 1.7 seconds
Processed 450 clips in 1.7 seconds
Processed 500 clips in 1.5 seconds
Processed 550 clips in 1.7 seconds
Processed 600 clips in 1.4 seconds
Processed 650 clips in 1.4 seconds
Processed 700 clips in 1.4 seconds
Processed 750 clips in 1.7 seconds
Processed 800 clips in 1.4 seconds
Processed 850 clips in 1.4 seconds
Processed 900 clips in 1.7 seconds
Processed 950 clips in 1.4 seconds
Processed 1000 clips in 1.6 seconds
Processed 1050 clips in 1.6 seconds
Processed 1100 clips in 1.3 seconds
Processed 1150 clips in 2.0 seconds
Processed 1200 clips in 1.7 seconds
Processed 1250 clips in 1.7 seconds
Processed 1300 clips in 1.6 seconds
Processed 1350 clips in 1.8 seconds
Processed 1400 clips in 1.7 seconds
Processed 14

In [47]:
print("Global Metrics for Foreground-Background Separation approach")
global_statistics_df_fg_bg = global_statistics(statistics_df_fg_bg)
global_statistics_df_fg_bg

Global Metrics for Foreground-Background Separation approach


Unnamed: 0,MANUAL ID,TRUE POSITIVE,FALSE NEGATIVE,FALSE POSITIVE,PRECISION,RECALL,F1
0,,50445.0,866.173175,14256.0,0.7797,0.9831,0.8697


In [48]:
print("Global Metrics for Microfaune Separation approach")
global_statistics_df_microfaune = global_statistics(statistics_df_microfaune)
global_statistics_df_microfaune

Global Metrics for Microfaune Separation approach


Unnamed: 0,MANUAL ID,TRUE POSITIVE,FALSE NEGATIVE,FALSE POSITIVE,PRECISION,RECALL,F1
0,,30729.0,12786.0,8832.0,0.7767,0.7062,0.7398


In [49]:
print("Global Metrics for Tweetynet Separation approach")
global_statistics_df_tweetynet = global_statistics(statistics_df_tweetynet)
global_statistics_df_tweetynet

Global Metrics for Tweetynet Separation approach


Unnamed: 0,MANUAL ID,TRUE POSITIVE,FALSE NEGATIVE,FALSE POSITIVE,PRECISION,RECALL,F1
0,,49620.0,1514.173175,5457.0,0.9009,0.9704,0.9344
