In [None]:
import glob
import os
import numpy as np
import torch
import torch.nn as nn
import time
import pandas as pd
import numpy as np
import librosa
from abc import ABC, abstractmethod
from librosa.feature import melspectrogram
from librosa.util import normalize
import shutil

In [None]:
def audio_to_chunks(audio_file,steps_per_subtrack = 160000, sr=32000):
    chunks = []
    data, samplerate = librosa.load(audio_file, sr=sr)
    track_length = data.shape[0]
    nChunks = track_length // steps_per_subtrack
    if (nChunks == 0): #if an audio is shorter than steps_per_subtrack, we duplicate it
        while (data.shape[0] < steps_per_subtrack):
            data = np.tile(data,2)
        nChunks = 1
    for i in range(nChunks):
        chunks.append(data[i*steps_per_subtrack:(i+1)*steps_per_subtrack])
    return chunks , samplerate

def indices_of_top_values(values, num_top):
    sorted_indices = sorted(range(len(values)), key=lambda i: values[i], reverse=True)
    return sorted_indices[:num_top]


In [None]:
class DataProcessor(ABC):

    def __init__(self,seconds = 5, sr=32000):

        self.seconds = seconds
        self.sr = sr
        self.steps_per_subtrack = seconds*sr

    def loadAudio(self,audio_file):
        chunks , _ = audio_to_chunks(audio_file=audio_file, steps_per_subtrack=self.steps_per_subtrack, sr=self.sr)
        return chunks
    
    @abstractmethod
    def processChunk(self,chunk):
        pass

class melSpectrogram(DataProcessor):

    def __init__(self,seconds, sr, n_mels, hop_length):

        super().__init__(seconds, sr)

        self.n_mels = n_mels
        self.hop_length = hop_length
        self.fmax = self.sr / 2
        # self.tensorShape = self.processChunk(self.loadAudio('C:/Users/fares/OneDrive/Bureau/kaggleBirds/data/BirdClef2024/unlabeled_soundscapes/460830.ogg')[0]).shape
        self.tensorShape = (224,224)

    def processChunk(self,chunk):
        # return melspectrogram(y = chunk, sr = self.sr, n_mels = self.n_mels, hop_length = self.hop_length, fmax = self.fmax)
        return normalize(melspectrogram(y = chunk, sr = self.sr, n_mels = self.n_mels, hop_length = self.hop_length, fmax = self.fmax))


In [None]:
class AxialDW(nn.Module):
    def __init__(self, dim, mixer_kernel, dilation = 1):
        super().__init__()
        h, w = mixer_kernel
        self.dw_h = nn.Conv2d(dim, dim, kernel_size=(h, 1), padding='same', groups = dim, dilation = dilation)
        self.dw_w = nn.Conv2d(dim, dim, kernel_size=(1, w), padding='same', groups = dim, dilation = dilation)

    def forward(self, x):
        x = x + self.dw_h(x) + self.dw_w(x)
        return x

class EncoderBlock(nn.Module):
    """Encoding then downsampling"""
    def __init__(self, in_c, out_c, mixer_kernel = (7, 7)):
        super().__init__()
        self.dw = AxialDW(in_c, mixer_kernel = (7, 7))
        self.bn = nn.BatchNorm2d(in_c)
        self.pw = nn.Conv2d(in_c, out_c, kernel_size=1)
        self.down = nn.MaxPool2d((2,2))
        self.act = nn.GELU()

    def forward(self, x):
        skip = self.bn(self.dw(x))
        x = self.act(self.down(self.pw(skip)))
        return x, skip

class DecoderBlock(nn.Module):
    """Upsampling then decoding"""
    def __init__(self, in_c, out_c, mixer_kernel = (7, 7)):
        super().__init__()
        self.up = nn.Upsample(scale_factor=2)
        self.pw = nn.Conv2d(in_c + out_c, out_c,kernel_size=1)
        self.bn = nn.BatchNorm2d(out_c)
        self.dw = AxialDW(out_c, mixer_kernel = (7, 7))
        self.act = nn.GELU()
        self.pw2 = nn.Conv2d(out_c, out_c, kernel_size=1)

    def forward(self, x, skip):
        x = self.up(x)
        x = torch.cat([x, skip], dim=1)
        x = self.act(self.pw2(self.dw(self.bn(self.pw(x)))))
        return x
    
class BottleNeckBlock(nn.Module):
    """Axial dilated DW convolution"""
    def __init__(self, dim):
        super().__init__()

        gc = dim//4
        self.pw1 = nn.Conv2d(dim, gc, kernel_size=1)
        self.dw1 = AxialDW(gc, mixer_kernel = (3, 3), dilation = 1)
        self.dw2 = AxialDW(gc, mixer_kernel = (3, 3), dilation = 2)
        self.dw3 = AxialDW(gc, mixer_kernel = (3, 3), dilation = 3)

        self.bn = nn.BatchNorm2d(4*gc)
        self.pw2 = nn.Conv2d(4*gc, dim, kernel_size=1)
        self.act = nn.GELU()

    def forward(self, x):
        x = self.pw1(x)
        x = torch.cat([x, self.dw1(x), self.dw2(x), self.dw3(x)], 1)
        x = self.act(self.pw2(self.bn(x)))
        return x

class ULite(nn.Module):
    def __init__(self):
        super().__init__()

        """Encoder"""
        # self.conv_in = nn.Conv2d(3, 16, kernel_size=7, padding='same')
        self.conv_in = nn.Conv2d(1, 16, kernel_size=7, padding='same')
        self.e1 = EncoderBlock(16, 32)
        self.e2 = EncoderBlock(32, 64)
        self.e3 = EncoderBlock(64, 128)
        self.e4 = EncoderBlock(128, 256)
        self.e5 = EncoderBlock(256, 512)

        """Bottle Neck"""
        self.b5 = BottleNeckBlock(512)

        """Decoder"""
        self.d5 = DecoderBlock(512, 256)
        self.d4 = DecoderBlock(256, 128)
        self.d3 = DecoderBlock(128, 64)
        self.d2 = DecoderBlock(64, 32)
        self.d1 = DecoderBlock(32, 16)
        self.conv_out = nn.Conv2d(16, 1, kernel_size=1)

    def forward(self, x):
        """Encoder"""
        x = self.conv_in(x)
        x, skip1 = self.e1(x)
        x, skip2 = self.e2(x)
        x, skip3 = self.e3(x)
        x, skip4 = self.e4(x)
        x, skip5 = self.e5(x)

        """BottleNeck"""
        x = self.b5(x)    

        """Decoder"""
        x = self.d5(x, skip5)
        x = self.d4(x, skip4)
        x = self.d3(x, skip3)
        x = self.d2(x, skip2)
        x = self.d1(x, skip1)
        x = self.conv_out(x)
        return x

In [None]:
class ClassifierForULite(nn.Module):

    def __init__(self,autoEncoder,nClasses):

        super(ClassifierForULite,self).__init__()
        self.nClasses = nClasses
        
        self.autoEncoder = autoEncoder

        self.totunecnn1 = nn.Conv2d(512, 16, 3, stride=1, padding=0, bias=False)
        self.totunecnn2 = nn.Conv2d(16, 8, 3, stride=1, padding=0, bias=False)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(3)
        self.totunelin6 = nn.Linear(8,self.nClasses)

    def forward(self, x):
        x = self.autoEncoder.conv_in(x)
        x, _ = self.autoEncoder.e1(x)
        x, _ = self.autoEncoder.e2(x)
        x, _ = self.autoEncoder.e3(x)
        x, _ = self.autoEncoder.e4(x)
        x, _ = self.autoEncoder.e5(x)
        x = self.totunecnn1(x)
        x = self.relu(x)
        x = self.totunecnn2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.totunelin6(x)
        return x

In [None]:
softmax = torch.nn.Softmax(dim = 1)

def loadModels():
    models = []
    model_path = 'C:/Users/fares/OneDrive/Bureau/kaggleBirds/models/BirdClef2024/optunaModels/'
    # model_path = '/kaggle/input/optunamodels/pytorch/optunamodels/1/'
    pretrainedModel = ULite()
    for group in range(2):
        for subgroup in range(12):
            if (group == 0 and subgroup == 0):
                model = ClassifierForULite(pretrainedModel,nClasses=17)
            elif (group == 1 and subgroup <= 1):
                model = ClassifierForULite(pretrainedModel,nClasses=16)
            else:
                model = ClassifierForULite(pretrainedModel,nClasses=15)

            path = model_path + str(group) + '_' + str(subgroup)           
            model.load_state_dict(torch.load(path,map_location=torch.device('cpu')))
            models.append(model)

    return models

from torch.ao.quantization import get_default_qconfig
from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx
import copy

def loadQuantizedModels():
    models = []
    model_path = 'C:/Users/fares/OneDrive/Bureau/kaggleBirds/models/BirdClef2024/optunaModels/'
    # model_path = '/kaggle/input/optunamodels/pytorch/optunamodels/1/'
    pretrainedModel = ULite()
    for group in range(2):
        for subgroup in range(12):
            if (group == 0 and subgroup == 0):
                model1 = ClassifierForULite(pretrainedModel,nClasses=17)
            elif (group == 1 and subgroup <= 1):
                model1 = ClassifierForULite(pretrainedModel,nClasses=16)
            else:
                model1 = ClassifierForULite(pretrainedModel,nClasses=15)

            path = model_path + str(group) + '_' + str(subgroup)           
            model1.load_state_dict(torch.load(path,map_location=torch.device('cpu')))
            model = copy.deepcopy(model1)
            model.eval()
            qconfig = get_default_qconfig("x86")
            qconfig_dict = {"": qconfig}
            model_prepared = prepare_fx(model, qconfig_dict,torch.randn(1, 1, 224, 224))
            calibration_data = [torch.randn(1, 1, 224, 224) for _ in range(100)]
            for i in range(len(calibration_data)):
                model_prepared(calibration_data[i])
            model_quantized = convert_fx(copy.deepcopy(model_prepared))
            models.append(model_quantized)

    return models

# def predict(model, nClasses, n_loops, batch_size, path):

#     with torch.no_grad():
#         ypred_test = torch.empty((0,nClasses), dtype = torch.long)
#         to = time.time()
#         for j in range(n_loops):
#             t = torch.load(path + str(j*batch_size) + '.pt' , map_location='cpu').unsqueeze(0).unsqueeze(0)
#             for i in range(batch_size - 1):
#                     x = torch.load(path + str(j*batch_size + i + 1) + '.pt' , map_location='cpu').unsqueeze(0).unsqueeze(0)
#                     t = torch.cat((t,x),dim=0)
#             print('time loading : ',(time.time()-to))
#             y_hat = model(t)
#             ypred_test = torch.cat((ypred_test,y_hat),dim=0)
        
#         ypred_test = softmax(ypred_test)
#         ypred_test.cpu().numpy()
    
#     return ypred_test

def predict(model, tensors, nClasses, n_loops, batch_size, path):

    with torch.no_grad():
        ypred_test = torch.empty((0,nClasses), dtype = torch.long)
        for j in range(n_loops):
#             t = torch.load(path + str(j*batch_size) + '.pt' , map_location='cpu').unsqueeze(0).unsqueeze(0)
#             for i in range(batch_size - 1):
#                     x = torch.load(path + str(j*batch_size + i + 1) + '.pt' , map_location='cpu').unsqueeze(0).unsqueeze(0)
#                     t = torch.cat((t,x),dim=0)
            y_hat = model(tensors[j*batch_size:(j+1)*batch_size])
            ypred_test = torch.cat((ypred_test,y_hat),dim=0)
        
        ypred_test = softmax(ypred_test)
#         ypred_test.cpu().numpy()
    
    return ypred_test

In [None]:
import time
def predict_for_sample(filename, sample_sub, dataProcessor, models, groups, competition_classes):
    s = time.time()
    file_id = filename.split(".ogg")[0].split("/")[-1] + '_'
    probabilities = np.ones((182,),dtype=np.float32)*4.798188e-08
    path = '/kaggle/working/chunks/'
    if not os.path.exists(path):
        os.makedirs('/kaggle/working/chunks/')
#     path = 'C:/Users/fares/OneDrive/Bureau/kaggleBirds/data/BirdClef2024/submission_example/'
    chunks = dataProcessor.loadAudio(filename)
    sl = time.time()
    tensors = torch.empty((0,1,224,224), dtype = torch.float32)
    for i,chunk in enumerate(chunks[:-1]):
        tensors = torch.cat((tensors,torch.from_numpy(dataProcessor.processChunk(chunk)).unsqueeze(0).unsqueeze(0)),dim=0)
#         torch.save(torch.from_numpy(dataProcessor.processChunk(chunk)),'/kaggle/working/chunks/' + str(i) + '.pt')
#         torch.save(torch.from_numpy(dataProcessor.processChunk(chunk)),'C:/Users/fares/OneDrive/Bureau/kaggleBirds/data/BirdClef2024/submission_example/' + str(i) + '.pt')
    print('time saving : ',time.time() - sl)
    del chunks
    print(tensors.shape)

#     q_hat = 0.9647314725735

    nChunks = len(os.listdir(path))
    batch_size = 2
    n_loops = nChunks // batch_size

    s = time.time()

    predictions = []
    predictionsGroup0 = []
    for _ in range(nChunks-1):
        predictions.append([])
        predictionsGroup0.append([])

    for group in range(2):
         for subgroup in range(12):

            training_list = groups[group][subgroup]

            if (group == 0 and subgroup == 0):
                nClasses=17
            elif (group == 1 and subgroup <= 1):
                nClasses=16
            else:
                nClasses=15
              
            modelInd = group*12 + subgroup
            models[modelInd].eval()

            ypred_test = predict(models[modelInd], tensors, nClasses, n_loops, batch_size, path)

            # print('group : ' + str(group) + 'subgroup ' + str(subgroup))

            # print('ypred_test. shape : ',ypred_test.shape)

            if group == 0:

                for i,elem in enumerate(ypred_test):

                    for bird_ind in indices_of_top_values(elem, 2):
                        predictionsGroup0[i].append(training_list[bird_ind])

                    # for classe,soft in enumerate(elem):
                    #     if (1 - soft) <= q_hat:
                    #         predictionsGroup0[i].append(training_list[classe])

            else:

                for i,elem in enumerate(ypred_test):

                    for bird_ind in indices_of_top_values(elem, 2):
                        bird = training_list[bird_ind]
                        if bird in predictionsGroup0[i]:
                            predictions[i].append(bird)

                    # for classe,soft in enumerate(elem):
                    #     bird = training_list[classe]
                    #     if (1 - soft) <= q_hat and bird in predictionsGroup0[i] :
                    #         predictions[i].append(bird)
    
    # print(predictions)

    for i in range(48):
        if predictions[i] != []:
            for species in predictions[i]:
                probabilities[competition_classes.index(species)] = 0.99999999
        row_id = file_id + str(5*(i+1))
        sample_sub.loc[sample_sub.row_id == row_id, competition_classes] = probabilities
        probabilities = np.ones((182,),dtype=np.float32)*4.798188e-08
    
    print('time processing : ',time.time() - s)
    return sample_sub

In [1]:
groups = [[['grewar3', 'commyn', 'hoopoe', 'comros', 'eucdov', 'bkwsti', 'barswa', 'graher1', 'bcnher', 'lirplo', 'grywag', 'zitcis1', 'eaywag1', 'rorpar', 'comkin1', 'blrwar1', 'houspa'], ['comgre', 'woosan', 'eurcoo', 'comsan', 'grnsan', 'litgre1', 'commoo3', 'grtdro1', 'bkskit1', 'rewbul', 'wemhar1', 'litegr', 'categr', 'putbab1', 'whiter2'], ['comtai1', 'asikoe2', 'blakit1', 'thbwar1', 'gyhcaf1', 'labcro1', 'comior1', 'whbwat1', 'rerswa1', 'purher1', 'rocpig', 'grnwar1', 'spodov', 'greegr', 'bladro1'], ['brnshr', 'kenplo1', 'brodro1', 'plapri1', 'whtkin2', 'crseag1', 'brnhao1', 'grecou1', 'blhori1', 'blnmon1', 'litswi1', 'ashdro1', 'stbkin1', 'revbul', 'asbfly'], ['rewlap1', 'houcro1', 'ruftre2', 'brwowl1', 'gybpri1', 'pursun4', 'litspi1', 'copbar1', 'gargan', 'laudov1', 'tibfly3', 'brcful1', 'nutman', 'cohcuc1', 'junbab2'], ['piebus1', 'inbrob1', 'ashpri1', 'piekin1', 'whbsho3', 'barfly1', 'rossta2', 'shikra1', 'lblwar1', 'whbwoo2', 'cregos1', 'insbab1', 'sohmyn1', 'goflea1', 'emedov2'], ['grejun2', 'gloibi', 'indpit1', 'ingori1', 'marsan', 'whrmun', 'mawthr1', 'pursun3', 'forwag1', 'junowl1', 'oripip1', 'btbeat1', 'grefla1', 'ashwoo2', 'spepic1'], ['pabflo1', 'whbwag1', 'compea', 'indrob1', 'grbeat1', 'maghor2', 'whcbar1', 'placuc3', 'grenig1', 'orihob2', 'grehor1', 'insowl1', 'whbbul2', 'rufwoo2', 'sbeowl1'], ['gryfra', 'yebbul3', 'lesyel1', 'brakit1', 'purswa3', 'vefnut1', 'bwfshr1', 'plhpar1', 'indrol2', 'lewduc1', 'brfowl1', 'spoowl1', 'bkcbul1', 'sqtbul1', 'lobsun2'], ['whbtre1', 'yebbab1', 'comfla1', 'heswoo1', 'crbsun2', 'tilwar1', 'moipig1', 'aspswi1', 'vehpar1', 'eurbla2', 'sttwoo1', 'malpar1', 'jerbus2', 'rufbab3', 'aspfly1'], ['dafbab1', 'grynig2', 'bkrfla1', 'kerlau2', 'indtit1', 'crfbar1', 'junmyn1', 'smamin1', 'maltro1', 'chbeat1', 'brwjac1', 'plaflo1', 'isbduc1', 'brasta1', 'wynlau1'], ['paisto1', 'redspu1', 'malwoo1', 'nilfly2', 'rutfly6', 'scamin3', 'bncwoo3', 'wbbfly1', 'pomgrp2', 'inpher1', 'blaeag1', 'darter2', 'integr', 'asiope1', 'niwpig1']], [['grewar3', 'hoopoe', 'eucdov', 'barswa', 'bcnher', 'grywag', 'eaywag1', 'comkin1', 'woosan', 'comsan', 'litgre1', 'grtdro1', 'rewbul', 'litegr', 'putbab1', 'blrwar1'], ['commyn', 'comros', 'bkwsti', 'graher1', 'lirplo', 'zitcis1', 'rorpar', 'comgre', 'eurcoo', 'grnsan', 'commoo3', 'bkskit1', 'wemhar1', 'categr', 'whiter2', 'houspa'], ['comtai1', 'blakit1', 'gyhcaf1', 'comior1', 'rerswa1', 'rocpig', 'spodov', 'bladro1', 'kenplo1', 'plapri1', 'crseag1', 'grecou1', 'blnmon1', 'ashdro1', 'revbul'], ['asikoe2', 'thbwar1', 'litgre1', 'whbwat1', 'purher1', 'grnwar1', 'greegr', 'brnshr', 'brodro1', 'whtkin2', 'brnhao1', 'blhori1', 'litswi1', 'stbkin1', 'bladro1'], ['rewlap1', 'ruftre2', 'gybpri1', 'litspi1', 'gargan', 'tibfly3', 'nutman', 'junbab2', 'inbrob1', 'piekin1', 'barfly1', 'shikra1', 'whbwoo2', 'insbab1', 'goflea1'], ['houcro1', 'brwowl1', 'labcro1', 'copbar1', 'laudov1', 'brcful1', 'cohcuc1', 'piebus1', 'ashpri1', 'whbsho3', 'rossta2', 'lblwar1', 'cregos1', 'sohmyn1', 'asbfly'], ['grejun2', 'indpit1', 'marsan', 'mawthr1', 'forwag1', 'oripip1', 'grefla1', 'spepic1', 'whbwag1', 'indrob1', 'maghor2', 'placuc3', 'orihob2', 'insowl1', 'rufwoo2'], ['gloibi', 'ingori1', 'crseag1', 'pursun3', 'junowl1', 'btbeat1', 'ashwoo2', 'pabflo1', 'compea', 'grbeat1', 'whcbar1', 'grenig1', 'grehor1', 'whbbul2', 'junbab2'], ['gryfra', 'lesyel1', 'purswa3', 'bwfshr1', 'indrol2', 'brfowl1', 'bkcbul1', 'lobsun2', 'yebbab1', 'heswoo1', 'tilwar1', 'aspswi1', 'eurbla2', 'malpar1', 'rufbab3'], ['yebbul3', 'brakit1', 'pursun4', 'plhpar1', 'lewduc1', 'spoowl1', 'sqtbul1', 'whbtre1', 'comfla1', 'crbsun2', 'moipig1', 'vehpar1', 'sttwoo1', 'jerbus2', 'emedov2'], ['dafbab1', 'bkrfla1', 'indtit1', 'junmyn1', 'maltro1', 'brwjac1', 'isbduc1', 'wynlau1', 'redspu1', 'nilfly2', 'scamin3', 'wbbfly1', 'inpher1', 'darter2', 'asiope1'], ['grynig2', 'kerlau2', 'barfly1', 'smamin1', 'chbeat1', 'plaflo1', 'brasta1', 'paisto1', 'malwoo1', 'rutfly6', 'bncwoo3', 'pomgrp2', 'blaeag1', 'integr', 'spepic1']]]

In [None]:
dataProcessor = melSpectrogram(seconds=5,sr=32000,n_mels=224,hop_length=716)
models = loadModels()
train_metadata = pd.read_csv("/kaggle/input/birdclef-2024/train_metadata.csv")
# train_metadata = pd.read_csv("C:/Users/fares/OneDrive/Bureau/kaggleBirds/data/BirdClef2024/train_metadata.csv")
competition_classes = sorted(train_metadata.primary_label.unique())
test_samples = list(glob.glob("/kaggle/input/birdclef-2024/unlabeled_soundscapes/*.ogg")) #/kaggle/input/birdclef-2024/unlabeled_soundscapes #/kaggle/input/birdclef-2024/test_soundscapes
# test_samples = list(glob.glob("C:/Users/fares/OneDrive/Bureau/kaggleBirds/data/BirdClef2024/unlabeled_soundscapes_test/*.ogg"))
sample_sub = pd.read_csv("/kaggle/input/birdclef-2024/sample_submission.csv")
# sample_sub = pd.read_csv("C:/Users/fares/OneDrive/Bureau/kaggleBirds/data/BirdClef2024/sample_submission.csv")
sample_sub[competition_classes] = sample_sub[competition_classes].astype(np.float32)

for sample_filename in test_samples:
    print(sample_filename)
    sample_sub = predict_for_sample(sample_filename, sample_sub, dataProcessor, models, groups, competition_classes)

sample_sub.to_csv("submission.csv", index=False)

