In [1]:
import pandas as pd
import matplotlib.pyplot as plot
import numpy as np

In [2]:
# had to make some quick edits with excel for the sake of time. I removed the No Birds label and 
# changed Glaucus winged gull to western gull, that was a bad ID on my part
# I also made sure that the dates didn't start off with a 0 as in 08/16/2021, because excel is stupid
automated_df = pd.read_csv("birdnet-lite_day-level_reduced.csv")
manual_df = pd.read_csv("volunteer_day-level.csv")

In [3]:
# Getting a unique list of all of the species with respect to the automated labels and with
# respect to the manual labels
automated_species_list = automated_df["SPECIES"].to_list()
automated_species_list = list(dict.fromkeys(automated_species_list))
manual_species_list = manual_df["SPECIES"].to_list()
manual_species_list = list(dict.fromkeys(manual_species_list))
# Creating a list of all of the species
union_species_list = list(set().union(manual_species_list,automated_species_list))

In [4]:
# creating a list of the days in which there were both automated and manual classifications
automated_days_list = automated_df["DATE"].to_list()
automated_days_list = list(dict.fromkeys(automated_days_list))
manual_days_list = manual_df["DATE"].to_list()
manual_days_list = list(dict.fromkeys(manual_days_list))
union_days_list = list(set().union(automated_days_list,manual_days_list))

In [5]:
# initializing the dictionary that will be used to create the final dataframe
output_dict = {
    "SPECIES" : union_species_list,
    "TRUE POSITIVES" : [],
    "FALSE POSITIVES" : [],
    "FALSE NEGATIVES" : [],
    "TRUE NEGATIVES" : []
}
# initializing the list that will contain all of the relevant metrics
# initializing all of the values to zero
tp_list = [0]*len(union_species_list)
fp_list = [0]*len(union_species_list)
fn_list = [0]*len(union_species_list)
tn_list = [0]*len(union_species_list)

In [6]:
# The idea is that I am going to create a 2-layer nested for loop where the outer loop goes through the 
# species list while the inner for loop goes through each relevant deployment day, deciding whether each
# species was IDed as a TP, FP, FN, TN with respect to each day

# Setting threshold by which we consider the confidence of an automated label to be good enough
threshold = 0.5

# looping through each bird species
for ndx in range(len(union_species_list)):
    # collecting current species of interest
    cur_species = union_species_list[ndx]
    # looping through the relevant days
    for day in union_days_list:
        # creating a temporary dataframe that contains the manual annotations from the relevant day
        tmp_manual_df = manual_df[manual_df["DATE"] == day]
        # creating a temporary dataframe that contains the automated annotations from the relevant day
        tmp_automated_df = automated_df[automated_df["DATE"] == day]
        
        # comparing the automated and manual annotations. Breaking down into TP, FP, FN, TN scores
        manual_species_df = tmp_manual_df[tmp_manual_df["SPECIES"] == cur_species]
        automated_species_df = tmp_automated_df[tmp_automated_df["SPECIES"] == cur_species]

        #  resetting the indices to make further calculations easier
        if (not manual_species_df.empty):
            manual_species_df.reset_index(drop=True,inplace=True)
        # 
        if (not automated_species_df.empty):
            automated_species_df.reset_index(drop=True,inplace=True)
        
        # cases for true negative (TN)
        if (automated_species_df.empty and manual_species_df.empty):
            tn_list[ndx] += 1
            
        elif (not automated_species_df.empty) and (automated_species_df["CONFIDENCE"][0] < threshold and manual_species_df.empty):
            tn_list[ndx] += 1
            
        # case for false positive (FP)
        elif (manual_species_df.empty and (automated_species_df["CONFIDENCE"][0] > threshold)):
            fp_list[ndx] += 1
            
        # cases for false negative (FN)
        elif ((not manual_species_df.empty) and (not automated_species_df.empty) and (automated_species_df["CONFIDENCE"][0] < threshold)) :
            fn_list[ndx] += 1
            
        elif ((not manual_species_df.empty) and automated_species_df.empty) :
            fn_list[ndx] += 1
            
        # case for true positive (TP)
        elif((not manual_species_df.empty) and (automated_species_df["CONFIDENCE"][0] > threshold)):
            tp_list[ndx] += 1

In [9]:
output_dict["TRUE POSITIVES"] = tp_list
output_dict["FALSE POSITIVES"] = fp_list
output_dict["FALSE NEGATIVES"] = fn_list
output_dict["TRUE NEGATIVES"] = tn_list
output_df = pd.DataFrame.from_dict(output_dict)

In [10]:
output_df

Unnamed: 0,SPECIES,TRUE POSITIVES,FALSE POSITIVES,FALSE NEGATIVES,TRUE NEGATIVES
0,Savanna Nightjar,0,0,0,7
1,Bewick's Wren,0,1,0,6
2,Greater Sand-Plover,0,0,0,7
3,Curve-billed Thrasher,0,0,0,7
4,Brown-crested Flycatcher,0,0,0,7
5,Orange-crowned Warbler,0,0,1,6
6,Arabian Scops-Owl,0,0,0,7
7,Dwarf Tinamou,0,0,0,7
8,Song Thrush,0,0,0,7
9,California Scrub-Jay,0,0,1,6


In [12]:
# precision = tp/(tp+fp)
precision_list = []
# recall = tp/(tp+fn)
recall_list = []
# f1 = 2*precision*recall/(precision+recall)
f1_list = []
# accuracy = tp/(tp+tn)
accuracy_list = []
for row in output_df.index:
    # getting in confusion matrix scores
    tp = output_df["TRUE POSITIVES"][row]
    fp = output_df["FALSE POSITIVES"][row]
    fn = output_df["FALSE NEGATIVES"][row]
    tn = output_df["TRUE NEGATIVES"][row]
    precision = 0.0
    recall = 0.0
    f1 = 0.0
    accuracy = 0.0
    # calculating precision
    if ((tp + fp) == 0):
        precision_list.append(precision)
    else:
        precision = tp/(tp+fp)
        precision_list.append(precision)
        
    # calculating recall
    if ((tp+fn) == 0):
        recall_list.append(recall)
    else:
        recall = tp/(tp+fn)
        recall_list.append(recall)
    
    # calculating f1
    if ( (precision == 0.0 ) and (recall == 0.0 ) ):
        f1_list.append(f1)
    else:
        f1 = 2*precision*recall/(precision+recall)
        f1_list.append(f1)
        
    # calculating accuracy
    if ((tp+tn) == 0):
        accuracy_list.append(accuracy)
    else:
        accuracy = tp/(tp+tn)
        accuracy_list.append(accuracy)
output_df["PRECISION"] = precision_list
output_df["RECALL"] = recall_list
output_df["F1"] = f1_list
output_df["ACCURACY"] = accuracy_list
output_df["F1"] = output_df["F1"].fillna(0.0)

In [13]:
output_df

Unnamed: 0,SPECIES,TRUE POSITIVES,FALSE POSITIVES,FALSE NEGATIVES,TRUE NEGATIVES,PRECISION,RECALL,F1,ACCURACY
0,Savanna Nightjar,0,0,0,7,0.0,0.0,0.0,0.0
1,Bewick's Wren,0,1,0,6,0.0,0.0,0.0,0.0
2,Greater Sand-Plover,0,0,0,7,0.0,0.0,0.0,0.0
3,Curve-billed Thrasher,0,0,0,7,0.0,0.0,0.0,0.0
4,Brown-crested Flycatcher,0,0,0,7,0.0,0.0,0.0,0.0
5,Orange-crowned Warbler,0,0,1,6,0.0,0.0,0.0,0.0
6,Arabian Scops-Owl,0,0,0,7,0.0,0.0,0.0,0.0
7,Dwarf Tinamou,0,0,0,7,0.0,0.0,0.0,0.0
8,Song Thrush,0,0,0,7,0.0,0.0,0.0,0.0
9,California Scrub-Jay,0,0,1,6,0.0,0.0,0.0,0.0
