In [233]:
import json
import os
import random
import csv
from librosa import get_duration
import librosa

from pydantic import BaseModel as ConfigBaseModel
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchaudio
import torchaudio.transforms as T
from torchvision.models.resnet import ResNet, BasicBlock

In [234]:
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Prepare paths
root_path = "/content/drive/MyDrive/birds/"
input_path = root_path + '/train_audio/'   
    
# Bird labels
train_meta = pd.read_csv('/kaggle/input/birdclef-2023/train_metadata.csv')
scored_birds = os.listdir('/kaggle/input/birdclef-2023/train_audio')
bird_label = np.asarray(scored_birds)
print("Labels:")
print(bird_label)

# Preprocessing data
sample_rate = 32000
n_fft = 2048
win_length = None
hop_length = 1024
n_mels = 128
min_sec_proc = sample_rate*5

# Conver to spectogramm
mel_spectrogram = T.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    center=True,
    pad_mode="reflect",
    power=2.0,
    norm='slaney',
    onesided=True,
    n_mels=n_mels,
    mel_scale="htk",
)

Using cpu device
Labels:
['yetgre1' 'moccha1' 'rostur1' 'walsta1' 'ratcis1' 'norfis1' 'macshr1'
 'brrwhe3' 'crefra2' 'pabspa1' 'sltnig1' 'cabgre1' 'equaka1' 'sobfly1'
 'rindov' 'wlwwar' 'brwwar1' 'gnbcam2' 'carcha1' 'abethr1' 'yertin1'
 'spewea1' 'varsun2' 'yebduc1' 'eubeat1' 'hadibi1' 'brcale1' 'litwea1'
 'sincis1' 'whbcro2' 'thrnig1' 'bubwar2' 'kvbsun1' 'blbpuf2' 'blakit1'
 'colsun2' 'bltapa1' 'gycwar3' 'joygre1' 'greegr' 'vibsta2' 'wtbeat1'
 'afrgos1' 'rebfir2' 'yebgre1' 'comsan' 'pygbat1' 'meypar1' 'yelbis1'
 'norbro1' 'ndcsun2' 'gybfis1' 'reftin1' 'brobab1' 'refwar2' 'norcro1'
 'yebapa1' 'yewgre1' 'palfly2' 'gargan' 'darter3' 'rerswa1' 'augbuz1'
 'gyhbus1' 'refcro1' 'witswa1' 'gryapa1' 'pitwhy' 'eaywag1' 'blhgon1'
 'yebsto1' 'hipbab1' 'whcpri2' 'spemou2' 'gobsta5' 'blksaw1' 'afecuc1'
 'spepig1' 'mabeat1' 'rewsta1' 'rebhor1' 'brtcha1' 'blacuc1' 'brican1'
 'rehblu1' 'gobbun1' 'supsta1' 'bkfruw1' 'litswi1' 'spmthr1' 'spwlap1'
 'quailf1' 'golher1' 'didcuc1' 'gytbar1' 'klacuc1' 'afbfly



In [235]:
# Set pseudo randomize
def torch_fix_seed(seed=42):
    # Python random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms = True
torch_fix_seed()

In [242]:
# Create spectrogramm for audio files

def audio_to_mel_label(filepath,
                       min_sec_proc,
                       mode='train',
                       data_index=0,
                       label_list=[],
                   
                       label_file=[],
                       mel_list=[]):

    waveform, sample_rate_file = torchaudio.load(filepath=filepath)
    samples, sr = librosa.load(filepath)
    lent =   (samples.size)//sr
    len_wav = waveform.shape[1]
    
    waveform = waveform[0, :].reshape(1, len_wav)  # stereo->mono mono->mono
    if not len_wav < min_sec_proc * 12:
        waveform = torch.cat((waveform, waveform[:, 0:len_wav]), 1)
        len_wav = min_sec_proc * 12
        waveform = waveform[:, 0:len_wav]

    for index in range(int(len_wav / min_sec_proc)):
        log_melspec = torch.log10(
            mel_spectrogram(waveform[0, index * min_sec_proc:index * min_sec_proc + min_sec_proc]).reshape(1, 128,157) + 1e-10)
        log_melspec = (log_melspec - torch.mean(log_melspec)) / torch.std(log_melspec)
        mel_list.append(log_melspec)
        
        
    mel_list = torch.stack(mel_list).to(device)
    with torch.no_grad():
        outputs = net(mel_list)
    outputs_test = out_sigmoid(outputs)
    

    chunks = int (lent/5)
    
    for idx, i in enumerate(range(chunks)):
        chunk_end_time = (i + 1) * 5
        row_id = afile + '_' + str(chunk_end_time)
        pred['row_id'].append(row_id)
        
        for bird in scored_birds:
            try:
                score = outputs_test[idx][np.where(bird_label == bird)]
            except IndexError:
                score = 0    
            pred[bird].append(0 if score == 0 else score.item())
            
    
    results = pd.DataFrame(pred, columns=colums)

    # Save results to csv file

    results.to_csv("submission.csv", index=False)
    
    return results
    
class ResNetBird(ResNet):
    def __init__(self):
        super().__init__(BasicBlock, [4, 8, 6, 4], num_classes=21)
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=1, padding=3, bias=False)


net = ResNetBird().to(device)



In [243]:
print(net)

ResNetBird(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True

In [238]:
print(torch.load('/kaggle/input/modelbird2023/model2.pt', map_location=torch.device('cpu')))

OrderedDict([('conv1.weight', tensor([[[[-1.7985e-01, -2.2334e-01,  6.7939e-03,  ...,  9.7836e-03,
            8.8091e-02,  1.0088e-01],
          [-2.5246e-04, -1.4284e-01,  2.5209e-01,  ..., -3.7774e-01,
           -1.1645e-01,  1.0901e-01],
          [-2.3688e-01, -1.9402e-01,  3.3950e-01,  ..., -3.7551e-01,
           -1.2012e-01,  1.5947e-01],
          ...,
          [-8.8587e-02,  1.6473e-01,  1.4931e-01,  ..., -2.4894e-01,
            2.9161e-02,  3.1622e-01],
          [-1.3138e-03,  4.3094e-01, -6.0429e-02,  ..., -1.5185e-01,
           -7.2346e-02,  2.0710e-01],
          [ 2.1082e-01,  4.0162e-01, -6.3082e-02,  ..., -1.5802e-01,
           -5.3744e-02,  3.1854e-02]]],


        [[[-2.6476e-01, -9.0947e-02,  4.0502e-02,  ..., -3.3398e-03,
           -7.4061e-02, -3.0883e-01],
          [ 1.5864e-01,  1.4845e-01,  1.7138e-02,  ...,  1.7215e-01,
            2.6415e-02,  1.3707e-01],
          [-5.4156e-02, -1.8043e-01,  1.7462e-02,  ..., -5.3574e-03,
            7.3158e-02, -2

In [244]:
# Download weights
net.load_state_dict(torch.load('/kaggle/input/modelbird2023/model2.pt', map_location=torch.device('cpu')), strict=False)
out_sigmoid = nn.Sigmoid()

# Path to test
test_audio_dir = '/kaggle/input/birdclef-2023/test_soundscapes'
file_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]

# Test
colums = ['row_id']
pred = {'row_id': []}
scored_birds.sort()

for bird in scored_birds:
    pred[bird]=[]
    colums.append(bird)


In [245]:
binary_th = 5e-8
net.eval()
max = 0.0

    
for afile in file_list:
    sings_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]
    path = '/kaggle/input/birdclef-2023/test_soundscapes/soundscape_29201.ogg'

    mel_list_test = []
    results = audio_to_mel_label(path, min_sec_proc, 'test', mel_list=mel_list_test)
  
    #res = get_duration(filename=str(path))


In [247]:
results.head()

Unnamed: 0,row_id,abethr1,abhori1,abythr1,afbfly1,afdfly1,afecuc1,affeag1,afgfly1,afghor1,...,yebsto1,yeccan1,yefcan,yelbis1,yenspu1,yertin1,yesbar1,yespet1,yetgre1,yewgre1
0,soundscape_29201_5,7.96892e-17,0,0,0,0,0,0,0,0,...,0,0,0,0,0,3.433309e-15,0,0,3.1688140000000005e-17,0
1,soundscape_29201_10,6.832265e-14,0,0,0,0,0,0,0,0,...,0,0,0,0,0,5.644049e-16,0,0,6.840564e-16,0
2,soundscape_29201_15,1.550803e-10,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2.333433e-13,0,0,1.038512e-13,0
3,soundscape_29201_20,8.941987e-11,0,0,0,0,0,0,0,0,...,0,0,0,0,0,4.243506e-13,0,0,4.486499e-10,0
4,soundscape_29201_25,2.377621e-15,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1.920474e-16,0,0,2.863454e-15,0
