#Подключение библиотек

In [16]:
import json
import os
import random

from pydantic import BaseModel as ConfigBaseModel
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchaudio
import torchaudio.transforms as T
from torchvision.models.resnet import ResNet, BasicBlock

#Класс для гиперпараметров
Сделан отдельно для удобства

In [17]:
class Config(ConfigBaseModel):
    model_name = "net"
    test_size = 0.2
    seed = 887
    fit_verbose = 1 if (os.environ.get('KAGGLE_KERNEL_RUN_TYPE') == "Interactive") else 2
    dataset_dir = "/kaggle/train/"
    path_data = "/kaggle/img_stats.csv"
    label = "label"
    n_label = 264
    img_size = (128, 256)
    channels = 1
    img_shape = (*img_size, channels)
    ## model
    base_model_weights = "imagenet"
    ## training
    label_smoothing = 0.05
    shuffle_size = 1028
    steps_per_epoch = 300
    batch_size = 128
    valid_batch_size = batch_size
    epochs = 30
    patience = 4
    monitor = "val_loss"  # val_loss
    monitor_mode = "auto"
    lr = 1e-3

In [18]:
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Prepare paths
root_path = "/kaggle/"
input_path = root_path + '/train_audio/'   
    
    
scored_birds = Config()

bird_label = np.asarray(scored_birds)

# Preprocessing data
sample_rate = 32000
n_fft = 2048
win_length = None
hop_length = 1024
n_mels = 128
min_sec_proc = sample_rate*5

mel_spectrogram = T.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    center=True,
    pad_mode="reflect",
    power=2.0,
    norm='slaney',
    onesided=True,
    n_mels=n_mels,
    mel_scale="htk",
)

Using cuda device




In [19]:
# Set pseudo randomize
def torch_fix_seed(seed=42):
    # Python random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms = True
torch_fix_seed()

In [35]:
# Create spectrogramm for audio files
def audio_to_mel_label(filepath,
                       min_sec_proc,
                       mode='train',
                       data_index=0,
                       label_list=[],
                       bird_label=[],
                       label_file=[],
                       mel_list=[]):

    waveform, sample_rate_file = torchaudio.load(filepath=filepath)
    len_wav = waveform.shape[1]
    waveform = waveform[0, :].reshape(1, len_wav)  # stereo->mono mono->mono
    if not len_wav < min_sec_proc * 12:
        waveform = torch.cat((waveform, waveform[:, 0:len_wav]), 1)
        len_wav = min_sec_proc * 12
        waveform = waveform[:, 0:len_wav]

    for index in range(int(len_wav / min_sec_proc)):
        log_melspec = torch.log10(
            mel_spectrogram(waveform[0, index * min_sec_proc:index * min_sec_proc + min_sec_proc]).reshape(1, 128,157) + 1e-10)
        log_melspec = (log_melspec - torch.mean(log_melspec)) / torch.std(log_melspec)
        mel_list.append(log_melspec)
    return mel_list


class ResNetBird(ResNet):
    def __init__(self):
        super().__init__(BasicBlock, [4, 8, 6, 4], num_classes=21)
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=1, padding=3, bias=False)


net = ResNetBird().to(device)



#Вывод содержимого модели

In [36]:
print(net)

ResNetBird(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True

In [38]:
#print(torch.load('/content/drive/MyDrive/CoNo/model.pt'))
net.load_state_dict(torch.load('/content/drive/MyDrive/CoNo/model.pt'))
out_sigmoid = nn.Sigmoid()

test_audio_dir = root_path + 'birds.ogg'
file_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]

pred = {'row_id': [], 'target': []}
binary_th = 5e-8
net.eval()

for afile in file_list:
    path = test_audio_dir + afile + '.ogg'
    chunks = [[] for i in range(12)]
    mel_list_test = []
    mel_list_test = audio_to_mel_label(path, min_sec_proc, 'test', mel_list=mel_list_test)
    mel_list_test = torch.stack(mel_list_test).to(device)
    outputs = net(mel_list_test)
    outputs_test = out_sigmoid(outputs)
    for idx, i in enumerate(range(len(chunks))):
        chunk_end_time = (i + 1) * 5
        
        for bird in scored_birds:
            try:
                score = outputs_test[idx][np.where(bird_label == bird)]
            except IndexError:
                score = 0
                
            row_id = afile + '_' + bird + '_' + str(chunk_end_time)
            pred['row_id'].append(row_id)
            pred['target'].append(True if score > binary_th else False)

results = pd.DataFrame(pred, columns=['row_id', 'target'])

print(results)

results.to_csv("./submission.csv", index=False)

RuntimeError: ignored