### Test set clip count

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# Set the root directory
root_dir = Path( '/mnt/d/retraining_BirdNET_2025/model_test/input_ready')  # Replace with your actual root directory

In [3]:
# Load the .csv
df_aru = pd.read_csv(root_dir / 'one-hot-encoded_validation1-ARUs.csv')

In [4]:
# Drop the first three columns. df.iloc is used to select by position (integer-location based indexing).
# [rows, colums]  
# :  → Selects all rows (we are not filtering rows here).
# 3: → Selects columns starting from index 3 up to the end (so drops 0,1,2 and keeps from 3 onwards)

df_aru = df_aru.iloc[:,3:]

In [5]:
# Sum each column and save it as a new Series object. 
# Pandas will ignore non-numeric by default, giving a Series with owl class names as index.
aru_clip_sums = df_aru.sum()
aru_clip_sums

Barn Owl                     7
Rusty-barred Owl_song       37
Rusty-barred Owl_call       10
Rusty-barred Owl_call1      10
Ferruginous Pygmy-Owl       49
Tropical Screech-Owl        56
Black-capped Screech-Owl    19
Long-tufted Screech-Owl     38
Buff-fronted Owl             5
Rufous Nightjar              0
Silky-tailed Nightjar       30
Stygian Owl_song             0
Stygian Owl_call             0
Striped Owl_song             0
Striped Owl_call            19
Burrowing Owl_call          28
Burrowing Owl_song           8
Rufous-capped Motmot         0
Black-banded Owl             0
Mottled Owl_song            35
Mottled Owl_call             0
Brown Tinamou                9
Ocellated Poorwill          14
Long-tailed Potoo            4
Common Potoo                25
Common Pauraque             33
Short-tailed Nighthawk      17
Spot-winged Wood-Quail       0
Tawny-browed Owl            10
Little Nightjar              1
dtype: int64

In [6]:
# We then turn that Series into a proper 2-column DataFrame with .index as 'class' and .values as 'count'.
aru_clips = pd.DataFrame({
    'class': aru_clip_sums.index,
    'count': aru_clip_sums.values
})

aru_clips.to_csv(root_dir / 'aru_all_clip-count.csv', index=False)

We will perform some opertaions on dataframes

In [7]:
# Load CSVs using relative paths
full_count = pd.read_csv(root_dir / 'all_clip-count.csv')
aru_count = pd.read_csv(root_dir / 'aru_all_clip-count.csv')

In [8]:
merged_df = pd.merge(
    full_count,
    aru_count,
    on = 'class',
    how = 'outer', # 'inner' if want inly classes present in both
    suffixes=('_full', '_aru')
)

print(merged_df)

                       class  count_full  count_aru
0                   Barn Owl          23          7
1      Rusty-barred Owl_song          37         37
2      Rusty-barred Owl_call          26         10
3     Rusty-barred Owl_call1          15         10
4      Ferruginous Pygmy-Owl          54         49
5       Tropical Screech-Owl          56         56
6   Black-capped Screech-Owl          19         19
7    Long-tufted Screech-Owl          38         38
8           Buff-fronted Owl          14          5
9            Rufous Nightjar          10          0
10     Silky-tailed Nightjar          30         30
11          Stygian Owl_song          32          0
12          Stygian Owl_call          24          0
13          Striped Owl_song          36          0
14          Striped Owl_call          19         19
15        Burrowing Owl_call          28         28
16        Burrowing Owl_song          41          8
17      Rufous-capped Motmot          57          0
18          

In [9]:
merged_df= merged_df.assign(count_XC=merged_df["count_full"] - merged_df["count_aru"])

merged_df.to_csv(root_dir /'test_all-classes_clip-count.csv', index=False)