In [None]:
import json
import os
import random
import csv
from librosa import get_duration
import librosa

from pydantic import BaseModel as ConfigBaseModel
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchaudio
import torchaudio.transforms as T
from torchvision.models.resnet import ResNet, BasicBlock

In [None]:
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Prepare paths
root_path = "/content/drive/MyDrive/birds/"
input_path = root_path + '/train_audio/'   
    
# Bird labels
train_meta = pd.read_csv('/kaggle/input/birdclef-2023/train_metadata.csv')
scored_birds = os.listdir('/kaggle/input/birdclef-2023/train_audio')
bird_label = np.asarray(scored_birds)
print("Labels:")
print(bird_label)

# Preprocessing data
sample_rate = 32000
n_fft = 2048
win_length = None
hop_length = 1024
n_mels = 128
min_sec_proc = sample_rate*5

# Conver to spectogramm
mel_spectrogram = T.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    center=True,
    pad_mode="reflect",
    power=2.0,
    norm='slaney',
    onesided=True,
    n_mels=n_mels,
    mel_scale="htk",
)

In [None]:
# Set pseudo randomize
def torch_fix_seed(seed=42):
    # Python random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms = True
torch_fix_seed()

In [None]:
# Create spectrogramm for audio files

def audio_to_mel_label(filepath,
                       mode='train',
                        ):
    mel_list=[]
    outputs_test = []
    waveform, sample_rate_file = torchaudio.load(filepath=filepath)
    samples, sr = librosa.load(filepath)
    lent =   (samples.size)//sr
    len_wav = waveform.shape[1]
    
    waveform = waveform[0, :].reshape(1, len_wav)  # stereo->mono mono->mono
    if not len_wav < min_sec_proc * 12:
        waveform = torch.cat((waveform, waveform[:, 0:len_wav]), 1)
        len_wav = min_sec_proc * 12
        waveform = waveform[:, 0:len_wav]
    with torch.no_grad():
        for index in range(int(len_wav / min_sec_proc)):
            log_melspec = torch.log10(
                mel_spectrogram(waveform[0, index * min_sec_proc:index * min_sec_proc + min_sec_proc]).reshape(1, 128,157) + 1e-10)
            log_melspec = (log_melspec - torch.mean(log_melspec)) / torch.std(log_melspec)
            log_melspec.unsqueeze(0)
            log_melspec=torch.reshape(log_melspec, [1,1,128,157])
            if index==0:
                outputs=(net(log_melspec))
            else:
                outputs.add(net(log_melspec))

    outputs_test=(out_sigmoid(outputs))

    chunks = int (lent/5)

    for idx, i in enumerate(range(chunks)):
        chunk_end_time = (i + 1) * 5
        row_id = afile + '_' + str(chunk_end_time)
        pred['row_id'].append(row_id)

        for bird in scored_birds:
            try:
                score = outputs_test[idx][np.where(bird_label == bird)]
            except IndexError:
                score = 0    
            pred[bird].append(0 if score == 0 else score.item())

    
    results = pd.DataFrame(pred, columns=colums)

    # Save results to csv file

    results.to_csv("submission.csv", index=False)
    
    return results
    
class ResNetBird(ResNet):
    def __init__(self):
        super().__init__(BasicBlock, [4, 8, 6, 4], num_classes=264)
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=1, padding=3, bias=False)


net = ResNetBird().to(device)



In [None]:
#print(net)

In [None]:
#print(torch.load('/kaggle/input/modelbird2023/model2.pt', map_location=torch.device('cpu')))

In [None]:
# Download weights
net.load_state_dict(torch.load('/kaggle/input/modelbird2023/model2.pt', map_location=torch.device('cpu')), strict=False)
out_sigmoid = nn.Sigmoid()

# Path to test
test_audio_dir = '/kaggle/input/birdclef-2023/test_soundscapes'
file_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]

#Creating DataFrame(dictionary)
colums = ['row_id']
pred = {'row_id': []}
scored_birds.sort()

for bird in scored_birds:
    pred[bird]=[]
    colums.append(bird)


In [None]:
binary_th = 5e-8
net.eval()
max = 0.0

# Test
for afile in file_list:
    sings_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]
    path = '/kaggle/input/birdclef-2023/test_soundscapes/soundscape_29201.ogg'

    results = audio_to_mel_label(path, 'test')
  
    #res = get_duration(filename=str(path))


In [None]:
results.head()