Code modified from [STEFAN KAHL](https://www.kaggle.com/stefankahl)'s [How to submit to BirdCLEF 2023](https://www.kaggle.com/code/stefankahl/how-to-submit-to-birdclef-2023). 

In [1]:
import os
import numpy as np
import pandas as pd

def extract_numbers(row_id):
    parts = row_id.split('_')
    return parts[1]

# First, load list of audio files by parsing the test_soundscape folder.
test_audio_dir = '../input/birdclef-2024/test_soundscapes/'
test_df = pd.read_csv('/kaggle/input/birdclef-2024/sample_submission.csv')

test_df['numbers'] = test_df['row_id'].apply(extract_numbers)
file_list = test_df['numbers'].unique().tolist()
print('Number of test soundscapes:', len(file_list))

Number of test soundscapes: 1


In [2]:
# This is where we will store our results
pred = {'row_id': []}
train_audio_dir = '../input/birdclef-2024/train_audio/'
species_list = sorted(os.listdir(train_audio_dir))
for species_code in species_list:
    pred[species_code] = []

# Process audio files and make predictions
for afile in file_list:
    
    # Complete file path
    path = test_audio_dir + 'soundscapes_' + afile + '.ogg'
    
    # Open file with librosa and split signal into 5-second chunks
    # sig, rate = librosa.load(path, sr=32000)
    # ...
    
    num_chunks = len(test_df[test_df['row_id'].str.contains(afile)]['row_id'].tolist())
    chunks = [[] for i in range(num_chunks)]
    
    # Make prediction for each chunk
    # Each bird gets a random value in our case
    # since we don't actually have a model
    for i in range(len(chunks)):        
        chunk_end_time = (i + 1) * 5
        
        # Assign the row_id which we need to do for each chunk
        row_id = 'soundscape_' + afile + '_' + str(chunk_end_time)
        pred['row_id'].append(row_id)
        
        for bird in species_list:
            
            # This is our random prediction score for this bird
            score = np.random.uniform()     
            
            # Put the result into our prediction dict            
            pred[bird].append(score)

In [3]:
# Make a new data frame and look at some results        
results = pd.DataFrame(pred, columns = ['row_id'] + species_list)

# Quick sanity check
print(results.head()) 
    
# Convert our results to csv
results.to_csv("submission.csv", index=False)    

                  row_id    asbfly   ashdro1   ashpri1   ashwoo2   asikoe2  \
0   soundscape_1446779_5  0.428441  0.552646  0.027222  0.916861  0.906613   
1  soundscape_1446779_10  0.413254  0.328315  0.311110  0.107907  0.810310   
2  soundscape_1446779_15  0.772658  0.194165  0.248085  0.576682  0.825783   

    asiope1   aspfly1   aspswi1   barfly1  ...   whbwoo2   whcbar1   whiter2  \
0  0.062372  0.084937  0.241112  0.849557  ...  0.154182  0.754416  0.204405   
1  0.423391  0.286027  0.393873  0.955669  ...  0.194327  0.460120  0.080039   
2  0.946094  0.725439  0.604485  0.416688  ...  0.237160  0.435801  0.636235   

     whrmun   whtkin2    woosan   wynlau1   yebbab1   yebbul3   zitcis1  
0  0.641561  0.467058  0.975736  0.665595  0.869848  0.099262  0.699513  
1  0.429280  0.949276  0.588427  0.200515  0.388003  0.682840  0.964211  
2  0.214108  0.762483  0.781339  0.041413  0.495233  0.957449  0.445814  

[3 rows x 183 columns]
