In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import os
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
from sklearn import model_selection
import torchvision.transforms as transforms
import torchvision.io 
import librosa
from PIL import Image
import albumentations as alb
import torch.multiprocessing as mp
import warnings

warnings.filterwarnings('ignore')

In [2]:
from pytorch_lightning.callbacks import ModelCheckpoint, BackboneFinetuning, EarlyStopping #回调函数


In [3]:
!pip install -q torchtoolbox timm


[0m

In [4]:
class Config:
    use_aug = False  # 是否使用数据增强
    num_classes = 360  # 分类任务的类别数量
    batch_size = 64  # 每个batch的样本数
    epochs = 5  # 训练的轮数
    PRECISION = 16  # 训练时使用的精度
    PATIENCE = 8  # 早停的最大等待轮数
    seed = 2023  # 随机数生成器的种子
    model = "tf_efficientnet_b0_ns"  # 使用的模型的名称
    pretrained = True  # 是否使用预训练权重
    weight_decay = 1e-3  # 权重衰减的系数
    use_mixup = True  # 是否使用Mixup数据增强
    mixup_alpha = 0.2  # Mixup数据增强的超参数
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # 设置使用的设备

    data_root = "/kaggle/input/birdclef-2023/"  # 数据集存放的根目录
    train_images = "/kaggle/input/split-creating-melspecs-stage-1/specs/train/"  # 训练集图像存放的路径
    valid_images = "/kaggle/input/split-creating-melspecs-stage-1/specs/valid/"  # 验证集图像存放的路径
    train_path = "/kaggle/input/bc2023-train-val-df/train.csv"  # 训练集元数据文件的路径
    valid_path = "/kaggle/input/bc2023-train-val-df/valid.csv"  # 验证集元数据文件的路径
    
    SR = 32000  # 音频的采样率
    DURATION = 5  # 音频的时长
    MAX_READ_SAMPLES = 5  # 读取音频时最多读取的帧数
    LR = 5e-4  # 初始学习率

In [5]:
pl.seed_everything(Config.seed, workers=True)

2023

In [6]:
def config_to_dict(cfg):
    return dict((name, getattr(cfg, name)) for name in dir(cfg) if not name.startswith('__'))

In [7]:
df_train = pd.read_csv(Config.train_path)
df_valid = pd.read_csv(Config.valid_path)
df_train.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename,len_sec_labels,path,frames,sr,duration
0,yebapa1,[],['song'],-3.3923,36.7049,Apalis flavida,Yellow-breasted Apalis,isaac kilusu,Creative Commons Attribution-NonCommercial-Sha...,3.0,https://www.xeno-canto.org/422175,yebapa1/XC422175.ogg,0,/kaggle/input/birdclef-2023/train_audio/yebapa...,405504,32000,12.672
1,yebapa1,[],['song'],-0.6143,34.0906,Apalis flavida,Yellow-breasted Apalis,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.0,https://www.xeno-canto.org/289562,yebapa1/XC289562.ogg,0,/kaggle/input/birdclef-2023/train_audio/yebapa...,796630,32000,24.894687
2,combuz1,[],['call'],51.8585,-8.2699,Buteo buteo,Common Buzzard,Irish Wildlife Sounds,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/626969,combuz1/XC626969.ogg,0,/kaggle/input/birdclef-2023/train_audio/combuz...,254112,32000,7.941
3,chibat1,['laudov1'],"['adult', 'sex uncertain', 'song']",-33.1465,26.4001,Batis molitor,Chinspot Batis,Lynette Rudman,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/664196,chibat1/XC664196.ogg,1,/kaggle/input/birdclef-2023/train_audio/chibat...,1040704,32000,32.522
4,carcha1,[],['song'],-34.011,18.8078,Cossypha caffra,Cape Robin-Chat,Shannon Ronaldson,Creative Commons Attribution-NonCommercial-Sha...,1.0,https://www.xeno-canto.org/322333,carcha1/XC322333.ogg,0,/kaggle/input/birdclef-2023/train_audio/carcha...,40124,32000,1.253875


In [8]:
typelist=[]
for types in df_train['type']:
    # print(eval(types))
    for type in eval(types):
        if type not in typelist:
            typelist.append(type)
# typelist #   catergories=360

In [11]:
multi_label=torch.zeros(360,dtype=torch.float)

In [12]:
df_train = pd.concat([df_train, pd.get_dummies(df_train['primary_label'])], axis=1)
df_valid = pd.concat([df_valid, pd.get_dummies(df_valid['primary_label'])], axis=1)

## Create & Fill birds with 0 samples in validation

In [13]:
birds = list(df_train.primary_label.unique())

In [14]:
missing_birds = list(set(list(df_train.primary_label.unique())).difference(list(df_valid.primary_label.unique())))

In [15]:
non_missing_birds = list(set(list(df_train.primary_label.unique())).difference(missing_birds))

In [16]:
len(non_missing_birds)

254

In [17]:
df_valid[missing_birds] = 0
df_valid = df_valid[df_train.columns] ## Fix order

In [19]:
import albumentations as A
def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.OneOf([
                A.Cutout(max_h_size=5, max_w_size=16),
                A.CoarseDropout(max_holes=4),
            ], p=0.5),
    ])

In [21]:
class BirdDataset(torch.utils.data.Dataset):
    # 定义一个名为BirdDataset的类，继承自PyTorch中的Dataset类

    def __init__(self, df,multi_label=multi_label,sr = Config.SR, duration = Config.DURATION, augmentations = None, train = True):
        # 初始化函数，接收数据框、多标签、采样率、持续时间、数据增强和训练模式等参数

        self.df = df
        # 存储输入的数据框

        self.sr = sr 
        # 存储采样率

        self.train = train
        # 存储训练模式

        self.duration = duration
        # 存储持续时间

        self.augmentations = augmentations
        # 存储数据增强

        self.labels=multi_label
        # 存储多标签

        if train:
            self.img_dir = Config.train_images
        else:
            self.img_dir = Config.valid_images
        # 根据训练模式选择数据集的路径

    def __len__(self):
        return len(self.df)
    # 返回数据集中的样本数量

    @staticmethod
    def normalize(image):
        image = image / 255.0
        # 对图像进行归一化处理，将像素值缩放到0-1之间
        # 这里的image是一个numpy数组

        #image = torch.stack([image, image, image])
        # 对图像进行扩展，将其从1通道扩展为3通道，因为后续的预训练模型需要输入3通道图像

        return image

    def __getitem__(self, idx):
        # 根据索引获取数据集中的一个样本

        row = self.df.iloc[idx]
        # 获取数据集中的一行数据

        impath = self.img_dir + f"{row.filename}.npy"
        # 构造数据集中的样本路径，这里的样本是以npy文件格式存储的

        image = np.load(str(impath))[:Config.MAX_READ_SAMPLES]
        # 使用numpy库从npy文件中加载图像数据，这里只读取前MAX_READ_SAMPLES个样本

        ########## RANDOM SAMPLING ################
        if self.train:
            image = image[np.random.choice(len(image))]
        else:
            image = image[0]
        # 对于训练模式，随机选择一个样本作为训练数据；对于测试模式，选择第一个样本作为测试数据
        # 这里的image是一个numpy数组

        #####################################################################

        image = torch.tensor(image).float()
        # 将numpy数组转换为PyTorch张量，并将其数据类型设置为float类型

        if self.augmentations:
            image = self.augmentations(image.unsqueeze(0)).squeeze()
        # 如果存在数据增强，则对图像进行增强处理，这里使用了PyTorch中的transforms库实现数据增强
        # 这里的image是一个PyTorch张量

        image.size()
        # 输出图像的大小

        self.tmplabel=self.labels.clone()
        # 复制多标签，以便后续修改标签

        types=eval(row[2])
        # 从数据框中获取图像的标签，并使用eval()函数将其转换为Python列表

        indexes=[]
        for typename in types:
            if typename not in typelist:
                indexes.append(0)
            else:
#             print(typename)
                indexes.append(typelist.index(typename))
        # 将标签转换为标签索引，如果标签不在标签列表中，则将其索引设置为0

        for index in indexes:
            self.tmplabel[index]=torch.tensor(1)
        # 将标签索引设置为1，表示该样本包含该标签

        image = torch.stack([image, image, image])
        # 对图像进行扩展，将其从1通道扩展为3通道，因为后续的预训练模型需要输入3通道图像

        image =self.normalize(image)
        # 对图像进行归一化处理，将像素值缩放到0-1之间

        return image, self.tmplabel
        # 返回处理后的图像和标签，这里的图像是一个PyTorch张量，标签是一个PyTorch张量，用于多标签分类任务

In [23]:
def get_fold_dls(df_train, df_valid):

    ds_train = BirdDataset(
        df_train, 
        sr = Config.SR,
        duration = Config.DURATION,
        augmentations = None,
        train = True
    )
    ds_val = BirdDataset(
        df_valid, 
        sr = Config.SR,
        duration = Config.DURATION,
        augmentations = None,
        train = False
    )
    dl_train = DataLoader(ds_train, batch_size=Config.batch_size , shuffle=True, num_workers = 2)    
    dl_val = DataLoader(ds_val, batch_size=Config.batch_size, num_workers = 2)
    return dl_train, dl_val, ds_train, ds_val

In [25]:
dl_train, dl_val, ds_train, ds_val = get_fold_dls(df_train, df_valid)

In [26]:
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau, OneCycleLR

def get_optimizer(lr, params):
    model_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, params), 
            lr=lr,
            weight_decay=Config.weight_decay
        )
    interval = "epoch"
    
    lr_scheduler = CosineAnnealingWarmRestarts(
                            model_optimizer, 
                            T_0=Config.epochs, 
                            T_mult=1, 
                            eta_min=1e-6, 
                            last_epoch=-1
                        )

    return {
        "optimizer": model_optimizer, 
        "lr_scheduler": {
            "scheduler": lr_scheduler,
            "interval": interval,
            "monitor": "val_loss",
            "frequency": 1
        }
    }

In [27]:
from torchtoolbox.tools import mixup_data, mixup_criterion
import torch.nn as nn
from torch.nn.functional import cross_entropy
import torchmetrics
import timm

In [28]:
import sklearn.metrics

def padded_cmap(solution, submission, padding_factor=5):
    solution = solution#.drop(['row_id'], axis=1, errors='ignore')
    submission = submission#.drop(['row_id'], axis=1, errors='ignore')
    new_rows = []
    for i in range(padding_factor):
        new_rows.append([1 for i in range(len(solution.columns))])
    new_rows = pd.DataFrame(new_rows)
    new_rows.columns = solution.columns
    padded_solution = pd.concat([solution, new_rows]).reset_index(drop=True).copy()
    padded_submission = pd.concat([submission, new_rows]).reset_index(drop=True).copy()
    score = sklearn.metrics.average_precision_score(
        padded_solution.values,
        padded_submission.values,
        average='macro',
    )
    return score

def map_score(solution, submission):
    solution = solution#.drop(['row_id'], axis=1, errors='ignore')
    submission = submission#.drop(['row_id'], axis=1, errors='ignore')
    score = sklearn.metrics.average_precision_score(
        solution.values,
        submission.values,
        average='micro',
    )
    return score

In [29]:
dummy = df_valid[birds].copy()
dummy[birds] = np.random.rand(dummy.shape[0],dummy.shape[1])

In [30]:
padded_cmap(df_valid[birds], dummy[birds], padding_factor = 5)

0.47528243207272347

In [31]:
padded_cmap(df_valid[birds], dummy[birds], padding_factor = 1)

0.21353161925724687

In [32]:
map_score(df_valid[birds], dummy[birds])

0.0037646377491044386

In [33]:
class BirdClefModel(pl.LightningModule):
    # 定义一个名为BirdClefModel的PyTorch Lightning模块，继承自LightningModule类

    def __init__(self, model_name=Config.model, num_classes = Config.num_classes, pretrained = Config.pretrained):
        super().__init__()
        # 初始化函数，接收预训练模型名称、分类数和预训练模型是否预训练等参数

        self.num_classes = num_classes
        # 存储分类数

        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        # 使用timm库创建预训练模型

        if 'res' in model_name:
            self.in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Linear(self.in_features, num_classes)
        elif 'dense' in model_name:
            self.in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Linear(self.in_features, num_classes)
        elif 'efficientnet' in model_name:
            self.in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Sequential(
                nn.Linear(self.in_features, num_classes)
            )
        # 根据预训练模型的类型，修改模型的最后一层，使其输出与分类数相等

        self.loss_function = nn.BCEWithLogitsLoss() 
        # 定义损失函数，使用二分类交叉熵损失函数

    def forward(self,images):
        logits = self.backbone(images)
        # 前向传递，计算模型的输出

        return logits
        
    def configure_optimizers(self):
        return get_optimizer(lr=Config.LR, params=self.parameters())
        # 配置优化器，这里使用了自定义函数get_optimizer()，返回一个优化器对象

    def train_with_mixup(self, X, y):
        X, y_a, y_b, lam = mixup_data(X, y, alpha=Config.mixup_alpha)
        y_pred = self(X)
        loss_mixup = mixup_criterion(cross_entropy, y_pred, y_a, y_b, lam)
        return loss_mixup
        # 实现Mixup数据增强的训练方式，计算Mixup损失

    def training_step(self, batch, batch_idx):
        image, target = batch
        # 获取数据集中的一个批次的图像和标签

        if Config.use_mixup:
            loss = self.train_with_mixup(image, target)
        else:
            y_pred = self(image)
            loss = self.loss_function(y_pred,target)
        # 如果使用Mixup数据增强，则调用train_with_mixup()函数计算损失；否则，直接计算损失

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        # 记录训练损失

        return loss        

    def validation_step(self, batch, batch_idx):
        image, target = batch     
        y_pred = self(image)
        val_loss = self.loss_function(y_pred, target)
        self.log("val_loss", val_loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        # 计算验证损失，并记录到TensorBoard日志中

        return {"val_loss": val_loss, "logits": y_pred, "targets": target}
        # 返回验证损失、模型输出和真实标签

    def train_dataloader(self):
        return self._train_dataloader 
        # 返回训练数据集的数据加载器

    def validation_dataloader(self):
        return self._validation_dataloader
        # 返回验证数据集的数据加载器

    def validation_epoch_end(self,outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        return {'val_loss': avg_loss,'val_cmap':0}
        # 计算验证集平均损失，并返回字典，其中包含验证损失和val_cmap（此处未使用）

In [34]:
from pytorch_lightning.loggers import WandbLogger
import gc

def run_training():
    print(f"Running training...")
    logger = None
    
    
    dl_train, dl_val, ds_train, ds_val = get_fold_dls(df_train, df_valid)
    
    audio_model = BirdClefModel()

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=Config.PATIENCE, verbose= True, mode="min")
    checkpoint_callback = ModelCheckpoint(monitor='val_loss',
                                          dirpath= "/kaggle/working/exp1/",
                                      save_top_k=1,
                                      save_last= True,
                                      save_weights_only=True,
                                      filename= f'./{Config.model}_loss',
                                      verbose= True,
                                      mode='min')
    
    callbacks_to_use = [checkpoint_callback,early_stop_callback]


    trainer = pl.Trainer(
        gpus=1,
        val_check_interval=0.5,
        deterministic=True,
        max_epochs=Config.epochs,
        logger=logger,
        auto_lr_find=False,    
        callbacks=callbacks_to_use,
        precision=Config.PRECISION, accelerator="gpu" 
    )

    print("Running trainer.fit")
    trainer.fit(audio_model, train_dataloaders = dl_train, val_dataloaders = dl_val)                

    gc.collect()
    torch.cuda.empty_cache()


In [35]:
run_training()

Running training...


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_b0_ns-c0e6a31c.pth


Running trainer.fit


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]