In [2]:
import os
import librosa
import numpy as np
import pandas as pd
import json
import torch
import sys
# sys.path.append('/kaggle/input/bird-clef-utils')
from utils import EffNetB0Classifier, process_spectrogram
torch.manual_seed(42)

device = "cpu"

label_map = json.load(open('label_map.json'))
# Class labels from train audio
class_labels = sorted(list(label_map.keys()))

# List of test soundscapes (only visible during submission)
# test_soundscape_path = '/kaggle/input/birdclef-2025/train_soundscapes'
test_soundscape_path = 'train_soundscapes'
test_soundscapes = [os.path.join(test_soundscape_path, afile) for afile in sorted(os.listdir(test_soundscape_path)) if afile.endswith('.ogg')]

# Open each soundscape and make predictions for 5-second segments
# Use pandas df with 'row_id' plus class labels as columns
predictions = pd.DataFrame(columns=['row_id'] + class_labels)



# Example usage
model = EffNetB0Classifier(num_classes=206)
model.load_state_dict(torch.load("my_model.pth", map_location=torch.device('cpu'), weights_only=True))
model.eval()  # Set the model to evaluation mode

for soundscape in test_soundscapes[:3]:
    # Load audio
    sig, rate = librosa.load(path=soundscape, sr=None)

    # Split into 5-second chunks
    chunks = []
    for i in range(0, len(sig), rate * 5):
        chunk = sig[i:i + rate * 5]
        if len(chunk) < rate * 5:  # Pad the chunk if it's less than 5 seconds
            chunk = np.pad(chunk, (0, rate * 5 - len(chunk)), mode='constant')
        chunks.append(chunk)

    # Make predictions for each chunk
    for i, chunk in enumerate(chunks):
        # Compute the spectrogram
        spectrogram = librosa.feature.melspectrogram(y=chunk, sr=rate, n_mels=128)
        spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
        spectrogram_tensor = process_spectrogram(spectrogram_db)
        
        # Get row id (soundscape id + end time of 5s chunk)
        row_id = os.path.basename(soundscape).split('.')[0] + f'_{i * 5 + 5}'

        # Make prediction
        with torch.no_grad():
            outputs = model(spectrogram_tensor.unsqueeze(0))
            scores = torch.softmax(outputs, dim=1).squeeze().cpu().numpy()

        # Append to predictions as new row
        new_row = pd.DataFrame([[row_id] + list(scores)], columns=['row_id'] + class_labels)
        predictions = pd.concat([predictions, new_row], axis=0, ignore_index=True)

# Save prediction as csv
predictions.to_csv('submission.csv', index=False)
print(predictions)

  predictions = pd.concat([predictions, new_row], axis=0, ignore_index=True)


                    row_id       1139490       1192948       1194042  \
0    H02_20230420_074000_5  2.438436e-07  1.224529e-07  1.939583e-05   
1   H02_20230420_074000_10  1.171384e-07  6.264505e-08  1.262106e-05   
2   H02_20230420_074000_15  1.978547e-06  8.670925e-07  4.050570e-05   
3   H02_20230420_074000_20  3.323807e-07  2.009233e-07  1.763334e-05   
4   H02_20230420_074000_25  4.686690e-07  1.804626e-07  3.466604e-05   
5   H02_20230420_074000_30  1.535506e-07  8.311643e-08  1.507958e-05   
6   H02_20230420_074000_35  1.053030e-07  5.812122e-08  1.234698e-05   
7   H02_20230420_074000_40  6.615792e-08  4.936289e-08  1.102239e-05   
8   H02_20230420_074000_45  5.401632e-08  3.255416e-08  8.801645e-06   
9   H02_20230420_074000_50  1.031015e-07  6.489057e-08  1.174515e-05   
10  H02_20230420_074000_55  1.367797e-07  7.211742e-08  1.356830e-05   
11  H02_20230420_074000_60  9.989711e-08  5.658960e-08  1.051624e-05   
12   H02_20230420_112000_5  3.774606e-08  7.500735e-07  1.248645