In [1]:
# This file is a modified version of the original:
# https://github.com/LIHANG-HONG/birdclef2023-2nd-place-solution/blob/main/train.py

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
import argparse
import importlib
from modules.preprocess import preprocess,prepare_cfg
from modules.dataset import get_train_dataloader
from modules.model import load_model
import pytorch_lightning as pl
from pytorch_lightning.loggers import CSVLogger, WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, BackboneFinetuning, EarlyStopping, TQDMProgressBar
import torch
import os
import gc
import json

In [4]:
# import graphviz
# graphviz.set_jupyter_format('png')
# from torchview import draw_graph

In [5]:
def make_parser():
    parser = argparse.ArgumentParser(description='BirdCLEF2023')
    # _ce denotes => cross-entropy.
    # _bce denotes => binary cross-entropy.
    parser.add_argument('--stage', default="pretrain_ce", choices=["pretrain_ce","pretrain_bce","train_ce","train_bce","finetune"])
    # sed => sound event detection
    parser.add_argument('--model_name', default="sed_v2s", choices=["sed_v2s",'sed_b3ns','sed_seresnext26t','cnn_v2s','cnn_resnet34d','cnn_b3ns','cnn_b0ns'])
    parser.add_argument('--use_pseudo', action='store_true')
    return parser

In [6]:
def main():
    parser = make_parser()
    args, unknown = parser.parse_known_args()
    stage = args.stage
    model_name = args.model_name
    use_pseudo = args.use_pseudo
    cfg = importlib.import_module(f'configs.{model_name}').basic_cfg
    cfg = prepare_cfg(cfg,stage)
    os.environ['WANDB_API_KEY'] = cfg.WANDB_API_KEY
    
    pl.seed_everything(cfg.seed[stage], workers=True)

    df_train, df_valid, df_label_train, df_label_valid, sample_weight, transforms = preprocess(cfg,stage)
    # df_valid and df_label_valid => empty dataframe
        
    pseudo = None

    # use_pseudo => False
    if use_pseudo:
        # =========================================================
        with open('/content/birdclef2023-2nd-place-solution/inputs/pseudo_label/pseudo.json') as f:
            pseudo = json.loads(f.read())

        with open('/content/birdclef2023-2nd-place-solution/inputs/hand_label/hand_label.json') as f:
            hand_label = json.loads(f.read())

        for version in hand_label['pred'].keys():
            for filename in hand_label['pred'][version].keys():
                for label in hand_label['pred'][version][filename].keys():
                    for second in hand_label['pred'][version][filename][label].keys():
                        for i in range(len(pseudo['subset1']['pseudo'])):
                            if second in pseudo['subset1']['pseudo'][i]['pred'][version][filename][label].keys():
                                pseudo['subset1']['pseudo'][i]['pred'][version][filename][label][second] = hand_label['pred'][version][filename][label][second]
        # =========================================================

    dl_train, dl_val, ds_train, ds_val = get_train_dataloader(
        df_train, # my_ [0:16]
        df_valid, # my_ [0:16]
        df_label_train, # my_ [0:16]
        df_label_valid, # my_ [0:16]
        sample_weight, # my_ [0:16]
        cfg,
        pseudo,
        transforms
    )

    #logger = WandbLogger(project='BirdClef-2023', name=f'{model_name}_{stage}') # disabled wandb
    logger = CSVLogger("logs", name=f'{model_name}_{stage}')
    
    checkpoint_callback = ModelCheckpoint(
        monitor=None, # 'val_loss', None
        dirpath= cfg.output_path[stage],
        save_top_k=0,
        save_last= True,
        save_weights_only=True,
        #filename= './ckpt_epoch_{epoch}_val_loss_{val_loss:.2f}',
        #filename ='./ckpt_{epoch}_{val_loss}',
        verbose= True,
        every_n_epochs=1,
        mode='min'
    )
    callbacks_to_use = [checkpoint_callback]        
    model = load_model(cfg,stage,train=True)
    
    trainer = pl.Trainer(
        devices=1,
        val_check_interval=1.0,
        deterministic=None,
        max_epochs=cfg.epochs[stage],
        logger=logger,
        callbacks=callbacks_to_use,
        precision=cfg.PRECISION, accelerator="auto",
    )

    print("Running trainer.fit")
        
    trainer.fit(model, train_dataloaders = dl_train, val_dataloaders = dl_val)

    _ = gc.collect()
    torch.cuda.empty_cache()
    return

In [7]:
if __name__=='__main__':
    main()

Global seed set to 20231121




GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..
You are using a CUDA device ('NVIDIA GeForce RTX 3050') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                | Type             | Params
----------------------------------------------------------
0  | loss_function       | CrossEntropyLoss | 0     
1  | mixup               | Mixup            | 0     
2  | mixup2              | Mixup2           | 0     
3  | audio_transforms    | Compose          | 0     
4  | time_

Running trainer.fit


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

epoch 0 train loss 17.417347993966306
epoch 1 train loss 16.946361193695083


In [9]:
# model.train()
# for batch in dl_train:
    
#     # batch[0].shape, batch[1].shape, batch[2].shape => torch.Size([16, 1, 320000]) torch.Size([16, 834]) torch.Size([16])
#     draw_graph(model, input_data = [batch], expand_nested=True, save_graph=True).visual_graph
#     break
     

### Convert to onnx

In [20]:
# pip3 install openvino-dev
!python3 convert.py --model_name sed_v2s

loading model from checkpoint
Exported graph: graph(%x : Float(120, 3, 128, 768, strides=[294912, 98304, 768, 1], requires_grad=0, device=cpu),
      %tta_delta : Long(requires_grad=0, device=cpu),
      %bn0.weight : Float(128, strides=[1], requires_grad=1, device=cpu),
      %bn0.bias : Float(128, strides=[1], requires_grad=1, device=cpu),
      %bn0.running_mean : Float(128, strides=[1], requires_grad=0, device=cpu),
      %bn0.running_var : Float(128, strides=[1], requires_grad=0, device=cpu),
      %encoder.2.3.0.se.conv_reduce.weight : Float(16, 256, 1, 1, strides=[256, 1, 1, 1], requires_grad=1, device=cpu),
      %encoder.2.3.0.se.conv_reduce.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
      %encoder.2.3.0.se.conv_expand.weight : Float(256, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=1, device=cpu),
      %encoder.2.3.0.se.conv_expand.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %encoder.2.3.1.se.conv_reduce.weight : Float(32, 512, 1, 1,

  %/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Shape_2_output_0 : Long(1, strides=[1], device=cpu) = onnx::Shape[onnx_name="/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Shape_2"](%/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Concat_output_0), scope: modules.model.BirdClefInferModelSED::/torch.nn.modules.container.Sequential::encoder/torch.nn.modules.container.Sequential::encoder.2/torch.nn.modules.container.Sequential::encoder.2.5/timm.models._efficientnet_blocks.InvertedResidual::encoder.2.5.0/timm.layers.conv2d_same.Conv2dSame::conv_dw # /home/na/miniconda3/envs/base_2/lib/python3.7/site-packages/timm/layers/padding.py:55:0
  %/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Gather_2_output_0 : Long(1, strides=[1], device=cpu) = onnx::Gather[axis=0, onnx_name="/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Gather_2"](%/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Shape_2_output_0, %/encoder/encoder.2/encoder.2.5/encoder.2.5.0/conv_dw/Constan

Exporting ONNX model to IR... This may take a few minutes.
