**## 必要な作業

- 一回Run Allで実行する
- 最後あたりのセルの出力として `best threshold` が生成される
- 生成された閾値を次のセルの`BEST_THRESHOLD`に書いて`Save Version`を押す
- submission.csvが生成されるので，提出する

In [None]:
# 📌NOTE: 一度Run Allして，下記の値を変更してください
# テーブルコンペのF1最適化用の閾値
BEST_THRESHOLD = 0.100428
# nocall挿入用の閾値
BEST_NOCALL_THRESHOLD = 0.169353

In [None]:
# !pip install -q --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn==1.0.dev0
# !pip install -q resnest    

In [None]:
!pip install ../input/scikit-learn-10dev0/scikit_learn-1.0.dev0-cp37-cp37m-manylinux2010_x86_64.whl
# !pip install -q ../input/resnest-v0-0-5/resnest-0.0.5-py3-none-any.whl
!pip install -q "../input/resnest50-fast-package/resnest-0.0.6b20200701/resnest"

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import sys
import numpy as np
import pandas as pd
from pathlib import Path

sys.path.append("../input/birdclef-toolkit-v0530-1930/lib")
import bird_recognition

In [None]:
print(bird_recognition.evaluation.TARGET_PATH)

In [None]:
# 10フレーム版
filepath_list = [
    # "../input/metadata-probability-v0525-2100/birdclef_resnest50_fold0_epoch_33_f1_val_03859_20210524151554.csv",
    
    # 👑fold: 1
    "../input/metadata-probability-v0525-2100/birdclef_resnest50_fold1_epoch_34_f1_val_04757_20210524185455.csv",
    
    # "../input/metadata-probability-v0525-2100/birdclef_resnest50_fold2_epoch_34_f1_val_05027_20210524223209.csv",
    # "../input/metadata-probability-v0525-2100/birdclef_resnest50_fold3_epoch_20_f1_val_04299_20210525010703.csv",
    # "../input/metadata-probability-v0525-2100/birdclef_resnest50_fold4_epoch_34_f1_val_05140_20210525074929.csv"  
]
prob_df = pd.concat([pd.read_csv(_) for _ in filepath_list])

In [None]:
# "../input/bird-probabilities/train_bird_call_prob_{0,1,2}.csv" # CV: 0.7764
class TrainingConfig:
    def __init__(self, debug:bool):
        # MEMO: Configのnocallの挿入時の閾値とは違うので注意！
        self.nocall_threshold:float=0.5
        
        self.debug = debug
        self.num_kfolds:int = 5
        self.num_spieces:int = 397
        self.num_candidates:int = 5
        self.max_distance:int = 15 # 20
        self.weight_rate:float = 1.0
        self.sampling_strategy:float = None # 1.0
        self.random_state:int=777
        self.num_prob:int = 6
        self.min_rating = None # choose from  [1,2,3,4,5, None]
        self.use_to_birds=True
        self.use_add_secondlabel=False # True
        self.xgb_params={
            "objective": "binary:logistic",
            "tree_method": 'gpu_hist',
            "n_estimators": 1000,
        }
        self.lgb_params = {
            'objective': 'binary',
            'metric': 'binary_logloss',
            'device':'gpu',
        }
        self.weights_filepath_dict = {
            # 'xgb':[f"./xgb_{kfold_index}.pkl" for kfold_index in range(self.num_kfolds)],
            'lgbm':[f"./lgbm_{kfold_index}.pkl" for kfold_index in range(self.num_kfolds)],
            # 'cat':[f"./cat_{kfold_index}.pkl" for kfold_index in range(self.num_kfolds)]
        }
        
training_config = TrainingConfig(
    debug=False
)
if training_config.debug:
    prob_df = prob_df.head(1000)

In [None]:
class Config:
    def __init__(self):
        self.num_kfolds:int = training_config.num_kfolds
        self.num_spieces:int = training_config.num_spieces
        self.num_candidates:int = training_config.num_candidates
        self.max_distance:int = training_config.max_distance
        self.nocall_threshold:float = training_config.nocall_threshold
        self.num_prob:int = training_config.num_prob
        # 最適な閾値でバサッと切った場合のスコアを確認するかどうか
        self.check_baseline:bool = True
        # その鳥で良いかどうかの判定時に使うモデルのファイルパスのリスト
        self.weights_filepath_dict = training_config.weights_filepath_dict
        # nocall挿入用の閾値
        self.nocall_threshold = BEST_NOCALL_THRESHOLD
        # フレームごとの各鳥の鳴く確率を予測するモデルの重み
        self.checkpoint_paths = [ 
            Path("../input/clefmodel/birdclef_resnest50_fold0_epoch_27_f1_val_05179_20210520120053.pth"), # id36
            Path("../input/clefmodel/birdclef_resnest50_fold0_epoch_13_f1_val_03502_20210522050604.pth"), # id51
            Path("../input/birdclef-groupby-author-05221040-728258/birdclef_resnest50_fold0_epoch_33_f1_val_03859_20210524151554.pth"), # id58
            Path("../input/birdclef-groupby-author-05221040-728258/birdclef_resnest50_fold1_epoch_34_f1_val_04757_20210524185455.pth"), # id59
            Path("../input/birdclef-groupby-author-05221040-728258/birdclef_resnest50_fold2_epoch_34_f1_val_05027_20210524223209.pth"), # id60
            Path("../input/birdclef-groupby-author-05221040-728258/birdclef_resnest50_fold3_epoch_20_f1_val_04299_20210525010703.pth"), # id61
            Path("../input/birdclef-groupby-author-05221040-728258/birdclef_resnest50_fold4_epoch_34_f1_val_05140_20210525074929.pth"), # id62
            Path("../input/clefmodel/resnest50_sr32000_d7_miixup-5.0_2ndlw-0.6_grouped-by-auther/birdclef_resnest50_fold0_epoch_78_f1_val_03658_20210528221629.pth"), # id97
            Path("../input/clefmodel/resnest50_sr32000_d7_miixup-5.0_2ndlw-0.6_grouped-by-auther/birdclef_resnest50_fold0_epoch_84_f1_val_03689_20210528225810.pth"), # id97
            Path("../input/clefmodel/resnest50_sr32000_d7_miixup-5.0_2ndlw-0.6_grouped-by-auther/birdclef_resnest50_fold1_epoch_27_f1_val_03942_20210529062427.pth"), # id98
        ]
        # 候補抽出に使うサンプルごとの各鳥の鳴く確率(キャッシュ)
        self.pred_filepath_list = [
            self.get_prob_filepath_from_checkpoint(path) for path in self.checkpoint_paths
        ]
        # その鳥で良いかどうかの判定時の最適な閾値
        self.threshold = BEST_THRESHOLD
        
    def get_prob_filepath_from_checkpoint(self, checkpoint_path:Path) -> str:
        filename = f"train_soundscape_labels_probabilitiy_%s.csv" % checkpoint_path.stem
        return filename

config = Config()

In [None]:
%%time
submission_df = bird_recognition.evaluation.run(
    training_config,
    config,
    prob_df,
    model_dict=config.weights_filepath_dict,
)

In [None]:
submission_df.to_csv("submission.csv", index=False)