In [None]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc
from natsort import natsorted

In [None]:
# Setting parameters
recordings = 26
dir = '/Users/lillianwang/Documents/bird-counts-25/'
birdnet_dir = dir+'data/birdnet/'
ebird_dir = dir+'data/ebird/'

In [None]:
def load_data(dir):
    files = natsorted([f for f in os.listdir(dir) if f.endswith('.csv')])
    dfs = []
    for i, file in enumerate(files):
        df = pd.read_csv(dir+file, encoding='latin1')
        df['Recording'] = i+1
        dfs.append(df)
    return dfs

In [None]:
# Read data
birdnet_dfs = load_data(birdnet_dir)
ebird_dfs = load_data(ebird_dir)

# Strip durations to digits
def extract_numbers(text):
    return ''.join(filter(str.isdigit, text))
    
for ebird_df, birdnet_df in zip(ebird_dfs, birdnet_dfs):
    if not ebird_df.empty:
        birdnet_df['Duration'] = ebird_df['Duration'].apply(extract_numbers)
        ebird_df['Duration'] = ebird_df['Duration'].apply(extract_numbers)

combined_birdnet = pd.concat(birdnet_dfs)
combined_ebird = pd.concat(ebird_dfs)

In [None]:
# Correcting inconsistent common names
name_corrections = {
    'Eastern Towhee (Red-eyed)': 'Eastern Towhee',
    'Song Sparrow (melodia/atlantica)': 'Song Sparrow',
    'Yellow Warbler (Northern)': 'Yellow Warbler',
    'Northern House Wren (Northern)': 'House Wren',
    'Red-winged Blackbird (Red-winged)': 'Red-winged Blackbird',
    'Common Grackle (Bronzed)': 'Common Grackle',
    'Hairy Woodpecker (Eastern)': 'Hairy Woodpecker',
    'Eastern Meadowlark (Eastern)': 'Eastern Meadowlark',
    'Downy Woodpecker (Eastern)': 'Downy Woodpecker',
    'Northern Cardinal (Northern)': 'Northern Cardinal',
    'White-breasted Nuthatch (Eastern)': 'White-breasted Nuthatch',
    'Northern Flicker (Yellow-shafted)': 'Northern Flicker'
}

for df in ebird_dfs:
  df['Species'] = df['Species'].replace(name_corrections)

combined_ebird['Species'] = combined_ebird['Species'].replace(name_corrections)

In [None]:
# Boxplots of confidence score by species
fig, axes = plt.subplots(nrows=recordings, ncols=1, figsize=(8, recordings*5))

for i in range(recordings):
  filtered_df = birdnet_dfs[i].groupby('Common name').filter(lambda x: len(x) >= 3)
  true_pos = set(ebird_dfs[i]['Species'])
  birdnet_species = set(filtered_df['Common name'])
  color_map = { sp: ('b' if sp in true_pos else 'r') for sp in birdnet_species }

  sns.boxplot(data=filtered_df, ax=axes[i], x='Common name', y='Confidence', hue='Common name', palette=color_map)
  axes[i].tick_params(rotation=45, labelsize=7.5)
  axes[i].grid(True)

  axes[i].set_title(f'Confidence score distribution by species ({i+1})')

plt.tight_layout()
plt.savefig('confidence-boxplots.png', dpi=200)
plt.show()

In [None]:
# Histogram of confidence scores across all recordings
sns.histplot(combined_birdnet['Confidence'], bins=20, binrange=(.25, 1), kde=True, color='b')
plt.grid(True)

plt.title('Confidence score distribution')
plt.xlabel('Confidence')
plt.ylabel('Occurrences')

plt.savefig('confidence-histogram.png', dpi=200)
plt.show()

In [None]:
# ROC curve across all recordings
y_true = []
y_scores = []

for i in range(recordings):
    ebird_species = set(ebird_dfs[i]['Species'])
    birdnet_species = set(birdnet_dfs[i]['Common name'])
    total_species = ebird_species.union(birdnet_species)

    for species in total_species:
        if not isinstance(species, str):
          continue
        
        y_true.append(1 if species in ebird_species and birdnet_species else 0)

        # Highest BirdNET confidence score for that species
        species_rows = birdnet_dfs[i][birdnet_dfs[i]['Common name'] == species]
        if species in birdnet_species:
          confidence = species_rows['Confidence'].max()
        else:
          confidence = 0.0
        y_scores.append(confidence)

# From StackExchange
fpr, tpr, thresholds = roc_curve(y_true, y_scores)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.legend(loc = 'lower right')
plt.grid(True)

plt.title('ROC curve')
plt.ylabel('True positive rate')
plt.xlabel('False positive rate')


plt.savefig('roc-curve.png', dpi=200)
plt.show()

In [None]:
# True pos, false pos, and false neg counts across all recordings
true_pos = 0
false_pos = 0
false_neg = 0

# Loop through recordings
for i in range(recordings):
    ebird_species = set(ebird_dfs[i]['Species'])
    birdnet_species = set(birdnet_dfs[i]['Common name'])
    total_species = ebird_species.union(birdnet_species)

    # Add to true pos, false pos, and false neg counts
    for species in total_species:
        if species in ebird_species and species in birdnet_species:
          true_pos += 1
        elif species in ebird_species and species not in birdnet_species:
          false_neg += 1
        elif species not in ebird_species and species in birdnet_species:
          false_pos += 1

plt.bar(['True positives', 'False positives', 'False negatives'], [true_pos, false_pos, false_neg], color=['g', 'b', 'r'])
plt.grid(True)

plt.title('True pos, false pos, and false neg')

plt.savefig('confusion-bar.png', dpi=200)
plt.show()