In [1]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black

In [7]:
from pathlib import Path

model_path = Path("../data/processed/anikethRuns")
all_checkpoints = list(model_path.glob("checkpoints*/**/*ckpt"))
checkpoints = []
for i in [128, 256, 512]:
    checkpoints.append((i, [x for x in all_checkpoints if str(i) in x.as_posix()][-1]))
checkpoints

[(128,
  WindowsPath('../data/processed/anikethRuns/checkpointsDim128/epoch=2-step=5018.ckpt')),
 (256,
  WindowsPath('../data/processed/anikethRuns/checkpointsDim256/checkpoints/epoch=6-step=12365.ckpt')),
 (512,
  WindowsPath('../data/processed/anikethRuns/checkpointsDim512/epoch=4-step=9869.ckpt'))]

In [29]:
from pathlib import Path
from birdclef.workflows.evaluation import model_logistic_regression
from birdclef.models.embedding.tilenet import TileNet
import pandas as pd

root = Path("../data/raw/birdclef-2022")
df = pd.read_parquet("../data/processed/2022-04-03-motif-consolidated.parquet")
df["species"] = df.source_name.apply(lambda x: x.split("/")[1])

species = ["brnowl", "skylar", "houfin"]
k = 250

scores = []
for dim, checkpoint in checkpoints:
    print(checkpoint)
    model = TileNet.load_from_checkpoint(checkpoint, z_dim=dim)
    for _ in range(10):
        score = model_logistic_regression(
            root,
            df,
            model,
            species,
            k,
        )
        print(f"dim: {dim} score: {score}")
        scores.append(dict(dim=dim, score=score, k=k))
score_df = pd.DataFrame(scores)
score_df

..\data\processed\anikethRuns\checkpointsDim128\epoch=2-step=5018.ckpt
STFT kernels created, time used = 0.1870 seconds
STFT filter created, time used = 0.0050 seconds
Mel filter created, time used = 0.0050 seconds
dim: 128 score: (0.4959677419354839,)
dim: 128 score: (0.5564516129032258,)
dim: 128 score: (0.49193548387096775,)
dim: 128 score: (0.5,)
dim: 128 score: (0.5,)
dim: 128 score: (0.5120967741935484,)
dim: 128 score: (0.5120967741935484,)
dim: 128 score: (0.5,)
dim: 128 score: (0.4798387096774194,)
dim: 128 score: (0.5080645161290323,)
..\data\processed\anikethRuns\checkpointsDim256\checkpoints\epoch=6-step=12365.ckpt
STFT kernels created, time used = 0.2493 seconds
STFT filter created, time used = 0.0040 seconds
Mel filter created, time used = 0.0040 seconds
dim: 256 score: (0.5362903225806451,)
dim: 256 score: (0.5040322580645161,)
dim: 256 score: (0.5120967741935484,)
dim: 256 score: (0.4879032258064516,)
dim: 256 score: (0.5,)
dim: 256 score: (0.5524193548387096,)
dim: 256

Unnamed: 0,dim,score,k
0,128,"(0.4959677419354839,)",250
1,128,"(0.5564516129032258,)",250
2,128,"(0.49193548387096775,)",250
3,128,"(0.5,)",250
4,128,"(0.5,)",250
5,128,"(0.5120967741935484,)",250
6,128,"(0.5120967741935484,)",250
7,128,"(0.5,)",250
8,128,"(0.4798387096774194,)",250
9,128,"(0.5080645161290323,)",250


In [30]:
score_df["score"] = score_df.score.apply(lambda x: x[0])

In [34]:
score_df.groupby("dim").describe()

Unnamed: 0_level_0,score,score,score,score,score,score,score,score,k,k,k,k,k,k,k,k
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
dim,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
128,10.0,0.505645,0.020313,0.479839,0.496976,0.5,0.511089,0.556452,10.0,250.0,0.0,250.0,250.0,250.0,250.0,250.0
256,10.0,0.507661,0.024152,0.483871,0.487903,0.502016,0.524194,0.552419,10.0,250.0,0.0,250.0,250.0,250.0,250.0,250.0
512,10.0,0.529435,0.039874,0.475806,0.509073,0.516129,0.556452,0.592742,10.0,250.0,0.0,250.0,250.0,250.0,250.0,250.0


In [None]:
scores_nopca = []
for dim, checkpoint in checkpoints:
    print(checkpoint)
    model = TileNet.load_from_checkpoint(checkpoint, z_dim=dim)
    for _ in range(10):
        score = model_logistic_regression(root, df, model, species, k, components=None)
        print(f"dim: {dim} score: {score}")
        scores_nopca.append(dict(dim=dim, score=score, k=k))
scores_nopca_df = pd.DataFrame(scores_nopca)
scores_nopca_df.groupby("dim").describe()