In [None]:
import sys
import os
if os.path.exists('/kaggle/input/'):
    sys.path.append('/kaggle/input/bird-clef-utils')

# Dataset processing

In [None]:
import pandas as pd

from utils import contains_human_voice, load_all_training_chunks

segment_length = 5  # seconds
stride = 5 

training_chunks = load_all_training_chunks()
# delete chunks containing human voice
training_chunks['contains_human_voice'] = training_chunks.apply(
    lambda row: contains_human_voice(row['filename'], row['audio_start'], row['audio_start'] + segment_length),
    axis=1
)
training_chunks = training_chunks[~training_chunks['contains_human_voice']]


# define model

In [None]:
import torch
from utils import EffNetB0Classifier

# Example usage
model = EffNetB0Classifier(num_classes=206)

# Load the model weights
model.load_state_dict('', map_location=torch.device('cpu'), weights_only=True))
model.eval()  # Set the model to evaluation mode

In [None]:
from utils import AudioDataset


my_ds = AudioDataset(training_chunks)
print(f"dataset size: {len(my_ds)}")

In [None]:
import torch
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader
# Create a DataLoader for the dataset with batch size 32
dataloader = DataLoader(my_ds, batch_size=32, shuffle=False)

# Ensure the model is in evaluation mode
model.eval()

# Create a list to store predictions and corresponding labels
predictions = []
labels = []

# Iterate through the DataLoader and make predictions
with torch.no_grad():
    for batch in tqdm(dataloader, desc="Processing dataset"):
        data, label = batch
        # Make prediction
        output = model(data)
        predicted_classes = torch.argmax(output, dim=1).tolist()
        
        # Store the predictions and the true labels
        predictions.extend(predicted_classes)
        labels.extend(label.tolist())

# Save predictions and labels to a DataFrame for analysis
results_df = pd.DataFrame({
    'True_Label': labels,
    'Predicted_Label': predictions
})

In [None]:
from sklearn.metrics import classification_report

c = classification_report(results_df['True_Label'], results_df['Predicted_Label'], output_dict=True)
report_df = pd.DataFrame(c).transpose()
report_df.to_csv('classification_report.csv')