In [117]:
import sys
import numpy as np
import pandas as pd
import pickle
import json
import torch
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from birdclef.datasets import soundscape
from birdclef.models.classifier import datasets
from birdclef.models.classifier import model as classifier_model
from birdclef.models.classifier_nn.datasets import ClassifierDataModule, ToEmbedSpace
from birdclef.models.classifier_nn.model import ClassifierNet
from birdclef.utils import chunks, transform_input

In [74]:
def generate_test_df(species):
    audio_root = "../data/raw/birdclef-2022/train_audio"
    filter_set = "../data/raw/birdclef-2022/scored_birds.json"
    scored_birds = json.loads(Path(filter_set).read_text())
    paths = list((Path(audio_root) / species).glob("*"))
    frames = []
    for path in paths:
        df = soundscape.parse_soundscape(path)
        frames.append(df[df.end_time != 0][["file_id", "end_time"]])
    test_df = pd.concat(frames).merge(
        pd.DataFrame(scored_birds, columns=["bird"]), how="cross"
    )
    test_df["row_id"] = (
        test_df["file_id"] + "_" + test_df["bird"] + "_" + test_df["end_time"].map(str)
    )
    return test_df

In [75]:
def test_df_with_true_label(species):
    test_df = generate_test_df(species)
    test_df["label"] = test_df["bird"] == species
    return test_df

### Test for original classifier

In [76]:
def predict(species, classifier_source):
    audio_root = "../data/raw/birdclef-2022/train_audio"

    classifier_source = Path(classifier_source)
    with open(classifier_source / "submit_classifier.pkl", "rb") as fp:
        cls_model = pickle.load(fp)

    metadata = json.loads((classifier_source / "metadata.json").read_text())
    print(metadata)
    model, device = datasets.load_embedding_model(
        classifier_source / "embedding.ckpt", metadata["embedding_dim"]
    )

    use_ref_motif = metadata.get("use_ref_motif", False)
    if use_ref_motif:
        ref_motif_df = datasets.load_ref_motif(
            classifier_source / "reference_motifs", cens_sr=metadata["cens_sr"]
        )

    test_df = test_df_with_true_label(species)

    res = []
    paths = list((Path(audio_root) / species).glob("*"))[:10]
    # we only use the first 10 audios of each species for testing
    for path in paths:
        df = soundscape.parse_soundscape(path)
        if not df.empty:
            X_raw = np.stack(df.x.values)
            X = np.hstack(
                [transform_input(model, device, X_raw)]
                + (
                    datasets.transform_input_motif(
                        ref_motif_df,
                        X_raw,
                        cens_sr=metadata["cens_sr"],
                        mp_window=metadata["mp_window"],
                    )
                    if use_ref_motif
                    else []
                )
            )
            y_pred = cls_model.classifier.predict(X)
            res_inner = []
            for row, pred in zip(df.itertuples(), y_pred):
                labels = cls_model.label_encoder.inverse_transform(np.flatnonzero(pred))
                for label in labels:
                    res_inner.append(
                        {
                            "file_id": row.file_id,
                            "bird": label,
                            "end_time": row.end_time,
                            "target": True,
                        }
                    )
            res.append(pd.DataFrame(res_inner))
    res_df = pd.concat(res)
    submission_df = test_df.merge(
        res_df[res_df.bird != "other"], on=["file_id", "bird", "end_time"], how="left"
    ).fillna(False)
    print(submission_df.head())
    return submission_df[["row_id", "bird", "label", "target"]]

In [77]:
filter_set = "../data/raw/birdclef-2022/scored_birds.json"
scored_birds = json.loads(Path(filter_set).read_text())
classifier_source = "../data/processed/model/2022-05-17-v7"
results = []
for species in scored_birds:
    results.append(predict(species, classifier_source))

{'embedding_source': 'data/intermediate/embedding/tile2vec-v5/version_10/checkpoints/epoch=2-step=5635.ckpt', 'embedding_dim': 512, 'created': '2022-05-17T22:00:49.874911', 'cens_sr': 10, 'mp_window': 20, 'use_ref_motif': False}
STFT kernels created, time used = 1.1888 seconds
STFT filter created, time used = 0.0458 seconds
Mel filter created, time used = 0.0458 seconds
    file_id  end_time    bird             row_id  label  target
0  XC122399         5  akiapo  XC122399_akiapo_5   True   False
1  XC122399         5  aniani  XC122399_aniani_5  False   False
2  XC122399         5  apapan  XC122399_apapan_5  False   False
3  XC122399         5  barpet  XC122399_barpet_5  False   False
4  XC122399         5  crehon  XC122399_crehon_5  False   False
{'embedding_source': 'data/intermediate/embedding/tile2vec-v5/version_10/checkpoints/epoch=2-step=5635.ckpt', 'embedding_dim': 512, 'created': '2022-05-17T22:00:49.874911', 'cens_sr': 10, 'mp_window': 20, 'use_ref_motif': False}
STFT kernels c

{'embedding_source': 'data/intermediate/embedding/tile2vec-v5/version_10/checkpoints/epoch=2-step=5635.ckpt', 'embedding_dim': 512, 'created': '2022-05-17T22:00:49.874911', 'cens_sr': 10, 'mp_window': 20, 'use_ref_motif': False}
STFT kernels created, time used = 1.1350 seconds
STFT filter created, time used = 0.0781 seconds
Mel filter created, time used = 0.0786 seconds
    file_id  end_time    bird             row_id  label  target
0  XC328230         5  akiapo  XC328230_akiapo_5  False   False
1  XC328230         5  aniani  XC328230_aniani_5  False   False
2  XC328230         5  apapan  XC328230_apapan_5  False   False
3  XC328230         5  barpet  XC328230_barpet_5  False   False
4  XC328230         5  crehon  XC328230_crehon_5  False   False
{'embedding_source': 'data/intermediate/embedding/tile2vec-v5/version_10/checkpoints/epoch=2-step=5635.ckpt', 'embedding_dim': 512, 'created': '2022-05-17T22:00:49.874911', 'cens_sr': 10, 'mp_window': 20, 'use_ref_motif': False}
STFT kernels c

In [78]:
result = pd.concat(results)

In [81]:
result

Unnamed: 0,row_id,bird,label,target
0,XC122399_akiapo_5,akiapo,True,False
1,XC122399_aniani_5,aniani,False,False
2,XC122399_apapan_5,apapan,False,False
3,XC122399_barpet_5,barpet,False,False
4,XC122399_crehon_5,crehon,False,False
...,...,...,...,...
8962,XC667142_omao_30,omao,False,False
8963,XC667142_puaioh_30,puaioh,False,False
8964,XC667142_skylar_30,skylar,False,False
8965,XC667142_warwhe1_30,warwhe1,False,False


In [119]:
f1_scores = []
for species in scored_birds:
    print("species:", species)
    if (
        result[
            (result["bird"] == species)
            & (result["label"] == True)
            & (result["target"] == True)
        ].shape[0]
        == 0
    ):
        precision = recall = f1_score = 0
    else:
        precision = result[
            (result["bird"] == species)
            & (result["label"] == True)
            & (result["target"] == True)
        ].shape[0] / (
            result[
                (result["bird"] == species)
                & (result["label"] == True)
                & (result["target"] == True)
            ].shape[0]
            + result[
                (result["bird"] == species)
                & (result["label"] == False)
                & (result["target"] == True)
            ].shape[0]
        )
        recall = result[
            (result["bird"] == species)
            & (result["label"] == True)
            & (result["target"] == True)
        ].shape[0] / (
            result[
                (result["bird"] == species)
                & (result["label"] == True)
                & (result["target"] == True)
            ].shape[0]
            + result[
                (result["bird"] == species)
                & (result["label"] == True)
                & (result["target"] == False)
            ].shape[0]
        )
        f1_score = 2 * (precision * recall) / (precision + recall)
    print("precision:", precision)
    print("recall:", recall)
    f1_scores.append(f1_score)

species: akiapo
precision: 0
recall: 0
species: aniani
precision: 0
recall: 0
species: apapan
precision: 0
recall: 0
species: barpet
precision: 0
recall: 0
species: crehon
precision: 0.058823529411764705
recall: 0.043478260869565216
species: elepai
precision: 0.07142857142857142
recall: 0.007633587786259542
species: ercfra
precision: 0.07317073170731707
recall: 0.2727272727272727
species: hawama
precision: 0
recall: 0
species: hawcre
precision: 0
recall: 0
species: hawgoo
precision: 0
recall: 0
species: hawhaw
precision: 0
recall: 0
species: hawpet1
precision: 0.05263157894736842
recall: 0.125
species: houfin
precision: 0
recall: 0
species: iiwi
precision: 0
recall: 0
species: jabwar
precision: 0
recall: 0
species: maupar
precision: 0
recall: 0
species: omao
precision: 0
recall: 0
species: puaioh
precision: 0.1
recall: 0.14285714285714285
species: skylar
precision: 0
recall: 0
species: warwhe1
precision: 0
recall: 0
species: yefcan
precision: 0
recall: 0


In [120]:
print("The macro f1-score of the classifier is:", np.mean(f1_scores))

The macro f1-score of the classifier is: 0.017661850082404514


### Test for neural network classifier

In [108]:
def predict(species, classifier_source, method):
    audio_root = "../data/raw/birdclef-2022/train_audio"
    classifier_source = Path(classifier_source)

    filter_set = json.loads(
        Path("../data/raw/birdclef-2022/scored_birds.json").read_text()
    )
    label_encoder = LabelEncoder()
    label_encoder.fit(["noise"] + filter_set)

    metadata = json.loads((classifier_source / "metadata.json").read_text())

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    to_embed = ToEmbedSpace(
        classifier_source / "embedding.ckpt", z_dim=metadata["embedding_dim"]
    )
    model = ClassifierNet.load_from_checkpoint(
        classifier_source / "classify.ckpt",
        z_dim=metadata["embedding_dim"],
        n_classes=len(label_encoder.classes_),
    )

    test_df = test_df_with_true_label(species)

    res = []
    paths = list((Path(audio_root) / species).glob("*"))[:10]
    # we only use the first 10 audios of each species for testing
    for path in paths:
        df = soundscape.parse_soundscape(path)
        if not df.empty:
            X_raw = torch.from_numpy(np.stack(df.x.values)).float().to(device)
            X, _ = to_embed((X_raw, None))
            y_pred = model.to(device)(X).cpu().detach().numpy()
            # now convert the prediction to something that we can use
            res_inner = []
            for row, pred in zip(df.itertuples(), y_pred):
                labels = []
                sorted_indices = np.argsort(pred)[::-1]
                if method == "top":
                    labels = label_encoder.inverse_transform(sorted_indices[:1])
                elif method == "top-not-noise":
                    for label in label_encoder.inverse_transform(sorted_indices):
                        if label == "noise":
                            break
                        labels.append(label)
                for label in labels:
                    res_inner.append(
                        {
                            "file_id": row.file_id,
                            "bird": label,
                            "end_time": row.end_time,
                            "target": True,
                        }
                    )
            res.append(pd.DataFrame(res_inner))
    res_df = pd.concat(res)
    submission_df = test_df.merge(
        res_df[res_df.bird != "noise"], on=["file_id", "bird", "end_time"], how="left"
    ).fillna(False)
    print(submission_df.head())
    return submission_df[["row_id", "bird", "label", "target"]]

In [118]:
classifier_source = "../data/processed/classify-nn/0.13.0-202205222347"
results_nn = []
for species in scored_birds:
    results_nn.append(predict(species, classifier_source, method="top"))

STFT kernels created, time used = 1.3795 seconds
STFT filter created, time used = 0.0768 seconds
Mel filter created, time used = 0.0768 seconds
    file_id  end_time    bird             row_id  label  target
0  XC122399         5  akiapo  XC122399_akiapo_5   True   False
1  XC122399         5  aniani  XC122399_aniani_5  False   False
2  XC122399         5  apapan  XC122399_apapan_5  False   False
3  XC122399         5  barpet  XC122399_barpet_5  False   False
4  XC122399         5  crehon  XC122399_crehon_5  False   False
STFT kernels created, time used = 1.4959 seconds
STFT filter created, time used = 0.0898 seconds
Mel filter created, time used = 0.0904 seconds
    file_id  end_time    bird             row_id  label  target
0  XC174949         5  akiapo  XC174949_akiapo_5  False   False
1  XC174949         5  aniani  XC174949_aniani_5   True   False
2  XC174949         5  apapan  XC174949_apapan_5  False   False
3  XC174949         5  barpet  XC174949_barpet_5  False    True
4  XC174

STFT kernels created, time used = 1.4199 seconds
STFT filter created, time used = 0.0282 seconds
Mel filter created, time used = 0.0282 seconds
    file_id  end_time    bird             row_id  label  target
0  XC175493         5  akiapo  XC175493_akiapo_5  False   False
1  XC175493         5  aniani  XC175493_aniani_5  False   False
2  XC175493         5  apapan  XC175493_apapan_5  False   False
3  XC175493         5  barpet  XC175493_barpet_5  False   False
4  XC175493         5  crehon  XC175493_crehon_5  False   False
STFT kernels created, time used = 1.2356 seconds
STFT filter created, time used = 0.0793 seconds
Mel filter created, time used = 0.0797 seconds
    file_id  end_time    bird             row_id  label  target
0  XC144892         5  akiapo  XC144892_akiapo_5  False   False
1  XC144892         5  aniani  XC144892_aniani_5  False   False
2  XC144892         5  apapan  XC144892_apapan_5  False   False
3  XC144892         5  barpet  XC144892_barpet_5  False   False
4  XC144

In [121]:
result_nn = pd.concat(results_nn)

In [122]:
f1_scores_nn = []
for species in scored_birds:
    print("species:", species)
    if (
        result_nn[
            (result_nn["bird"] == species)
            & (result_nn["label"] == True)
            & (result_nn["target"] == True)
        ].shape[0]
        == 0
    ):
        precision = recall = f1_score = 0
    else:
        precision = result_nn[
            (result_nn["bird"] == species)
            & (result_nn["label"] == True)
            & (result_nn["target"] == True)
        ].shape[0] / (
            result_nn[
                (result_nn["bird"] == species)
                & (result_nn["label"] == True)
                & (result_nn["target"] == True)
            ].shape[0]
            + result_nn[
                (result_nn["bird"] == species)
                & (result_nn["label"] == False)
                & (result_nn["target"] == True)
            ].shape[0]
        )
        recall = result_nn[
            (result_nn["bird"] == species)
            & (result_nn["label"] == True)
            & (result_nn["target"] == True)
        ].shape[0] / (
            result_nn[
                (result_nn["bird"] == species)
                & (result_nn["label"] == True)
                & (result_nn["target"] == True)
            ].shape[0]
            + result_nn[
                (result_nn["bird"] == species)
                & (result_nn["label"] == True)
                & (result_nn["target"] == False)
            ].shape[0]
        )
        f1_score = 2 * (precision * recall) / (precision + recall)
    print("precision:", precision)
    print("recall:", recall)
    f1_scores_nn.append(f1_score)

species: akiapo
precision: 0.038461538461538464
recall: 0.006896551724137931
species: aniani
precision: 0
recall: 0
species: apapan
precision: 0.05263157894736842
recall: 0.004056795131845842
species: barpet
precision: 0.05128205128205128
recall: 0.03076923076923077
species: crehon
precision: 0
recall: 0
species: elepai
precision: 0.09090909090909091
recall: 0.007633587786259542
species: ercfra
precision: 0
recall: 0
species: hawama
precision: 0.1111111111111111
recall: 0.007142857142857143
species: hawcre
precision: 0.23214285714285715
recall: 0.026859504132231406
species: hawgoo
precision: 0.09090909090909091
recall: 0.024390243902439025
species: hawhaw
precision: 0.016129032258064516
recall: 0.038461538461538464
species: hawpet1
precision: 0
recall: 0
species: houfin
precision: 0.029411764705882353
recall: 0.00030674846625766873
species: iiwi
precision: 0
recall: 0
species: jabwar
precision: 0.07692307692307693
recall: 0.0012453300124533001
species: maupar
precision: 0.0510752688172

In [123]:
print("The macro f1-score of the classifier is:", np.mean(f1_scores_nn))

The macro f1-score of the classifier is: 0.015075987634262983
