## Filter selections by Species and create new .txt files

This notebook is intended to help handle the big GlobalSelectionTable.txt that contains all the predictions for all the classes across all sites and survey night and start filtering it. First we will filter by species, staring from one common species/class. Then we will create a different .txt containing only the predictions for this species/class. Then, using the column 'Begin Path' we will extract the Site ID and the Survey night for each prediction (row of the .txt) and will add them as separate columns. Save the filtered df as a separate .txt. 

In [1]:
import os
import pandas as pd
from pathlib import Path

In [2]:
# Set pandas to handle large files
pd.options.mode.chained_assignment = None  # Suppress warnings

### Step 1: filter selections by species & save a separate .txt file

In [3]:
main_dir = "/mnt/d/night_recordings_analysis/"

selections = os.path.join(main_dir, "BirdNET_GlobalSelectionTable.txt")

In [4]:
df_selections= pd.read_csv(selections, delimiter="\t")
df_selections.head()

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Common Name,Species Code,Confidence,Begin Path,File Offset (s)
0,1,Spectrogram 1,1.0,0.0,3.0,0.0,15000.0,Brown Tinamou,brotin1,0.003,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,0.0
1,2,Spectrogram 1,1.0,3.0,6.0,0.0,15000.0,Rufous-capped Motmot,rucmot2,0.0016,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,3.0
2,3,Spectrogram 1,1.0,6.0,9.0,0.0,15000.0,Brown Tinamou,brotin1,0.0016,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,6.0
3,4,Spectrogram 1,1.0,6.0,9.0,0.0,15000.0,Red Junglefowl,redjun,0.001,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,6.0
4,5,Spectrogram 1,1.0,12.0,15.0,0.0,15000.0,Brown Tinamou,brotin1,0.0103,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,12.0


In [5]:
df_selections['Species Code'].unique()

array(['brotin1', 'rucmot2', 'redjun', 'Ciccaba virgata_Mottled Owl_song',
       'fepowl', 'Asio clamator_Striped Owl_call', 'bkbowl1', 'compau',
       'tabowl1', 'bufowl1', 'compot1',
       'Strix hylophila_Rusty-barred Owl_song',
       'Athene cunicularia_Burrowing Owl_call',
       'Strix hylophila_Rusty-barred Owl_call1', 'brnowl',
       'Ciccaba virgata_Mottled Owl_call', 'bkcsco1', 'trsowl',
       'Athene cunicularia_Burrowing Owl_song',
       'Strix hylophila_Rusty-barred Owl_call', 'swwqua1',
       'Asio clamator_Striped Owl_song', 'shtnig1', 'sitnig1', 'lotpot1',
       'ocepoo1', 'rufnig1', 'litnig1', 'Asio stygius_Stygian Owl_call',
       'Asio stygius_Stygian Owl_song', 'lotsco1', nan, 'nocall'],
      dtype=object)

In [6]:
species_code = "fepowl" 

In [7]:
df_filtered=df_selections[df_selections['Species Code']== species_code]
df_filtered.tail()

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Common Name,Species Code,Confidence,Begin Path,File Offset (s)
8110422,8110423,Spectrogram 1,1.0,9.0,12.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0012,/mnt/d/Disco4_Backup/night_recordings/P14-ARU6...,9.0
8115455,8115456,Spectrogram 1,1.0,69.0,72.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0015,/mnt/d/Disco4_Backup/night_recordings/P14-ARU6...,69.0
8115741,8115742,Spectrogram 1,1.0,15.0,18.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0012,/mnt/d/Disco4_Backup/night_recordings/P14-ARU6...,15.0
8116411,8116412,Spectrogram 1,1.0,66.0,69.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0045,/mnt/d/Disco4_Backup/night_recordings/P14-ARU6...,66.0
8138427,8138428,Spectrogram 1,1.0,114.0,117.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.001,/mnt/d/Disco4_Backup/night_recordings/P14-ARU6...,114.0


In [8]:
# Reset the 'Selection' column to be 1-based sequential integers
df_filtered['Selection'] = range(1, len(df_filtered) + 1)
df_filtered.head()

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Common Name,Species Code,Confidence,Begin Path,File Offset (s)
6,1,Spectrogram 1,1.0,12.0,15.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0014,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,12.0
9,2,Spectrogram 1,1.0,15.0,18.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0021,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,15.0
21,3,Spectrogram 1,1.0,42.0,45.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.002,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,42.0
23,4,Spectrogram 1,1.0,45.0,48.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0018,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,45.0
31,5,Spectrogram 1,1.0,60.0,63.0,0.0,15000.0,Ferruginous Pygmy-Owl,fepowl,0.0014,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,60.0


### Step 2: using the column 'Begin Path' and the names of .wav files create Site ID & SN columns

In [9]:
df = df_filtered
def extract_site_sn_from_path(path_str):
    path = Path(path_str)
    parts = path.parts

    try:
        site_folder = parts[-3]          # e.g., CH25-ARU22-1709-0510
        sn_folder = parts[-2]            # e.g., SN_05
        site_id = site_folder.split('-')[0]  # CH25
        sn = sn_folder.replace('SN_', '').zfill(2)  # 05
        return pd.Series([site_id, sn])
    except Exception as e:
        print(f"Warning: Failed to parse path: {path_str} | Error: {e}")
        return pd.Series([None, None])

# Apply to filtered DataFrame
df[['Site_ID', 'SN']] = df['Begin Path'].apply(extract_site_sn_from_path)

print("✔ Extracted Site_ID and SN from 'Begin Path' folder structure.")


✔ Extracted Site_ID and SN from 'Begin Path' folder structure.


In [10]:
# Exclude some colums from the final df. 
df_filtered1 = df.loc[:, ~df.columns.isin(['View', 'Channel', 'File Name', 'Begin Time (s)', 'End Time (s)', 'Low Freq (Hz)', 'High Freq (Hz)'])]
df_filtered1.head()

Unnamed: 0,Selection,Common Name,Species Code,Confidence,Begin Path,File Offset (s),Site_ID,SN
6,1,Ferruginous Pygmy-Owl,fepowl,0.0014,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,12.0,M4,1
9,2,Ferruginous Pygmy-Owl,fepowl,0.0021,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,15.0,M4,1
21,3,Ferruginous Pygmy-Owl,fepowl,0.002,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,42.0,M4,1
23,4,Ferruginous Pygmy-Owl,fepowl,0.0018,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,45.0,M4,1
31,5,Ferruginous Pygmy-Owl,fepowl,0.0014,/mnt/d/Disco1_Backup/night_recordings/M4-ARU26...,60.0,M4,1


In [11]:
common_name = df_filtered1['Common Name'].unique()[0]
common_name

In [13]:
# Define full output path based on species common name
species_dir = os.path.join(main_dir, common_name)

# Create the directory if it doesn't exist
os.makedirs(species_dir, exist_ok=True)


filtered1= os.path.join(species_dir, f"{species_code}_SelectionTable.txt")

# Save the filtered DataFrame to this full path
df_filtered1.to_csv(filtered1, sep='\t', index = False)

print(f"Filtered file saved to: {filtered1}")

Filtered file saved to: /mnt/d/night_recordings_analysis/Ferruginous Pygmy-Owl/fepowl_SelectionTable.txt
