Model

In [1]:


import torch.nn as nn
import torch.nn.functional as F


class BirdConv1d(nn.Module):
    def __init__(self, n_input=1, n_output=35, stride=16, n_channel=200):
        super().__init__()
        self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
        self.bn1 = nn.BatchNorm1d(n_channel)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(n_channel)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(2 * n_channel)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(2 * n_channel)
        self.pool4 = nn.MaxPool1d(4)
        self.fc1 = nn.Linear(2 * n_channel, n_output)

    def forward(self, x):
        #print('start of foward', x.size())
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        #print('after pool4', x.size())
        x = F.avg_pool1d(x, x.shape[-1])
        #print('after avg_pool1d', x.size())
        x = x.permute(0, 2, 1)
        #print('after permute', x.size())
        x = self.fc1(x)
        #print('after fc1', x.size())
  
        return F.log_softmax(x, dim=2)

In [2]:
import torchaudio
import torch
import json
import pandas as pd

import os
from os import listdir


def classify_file(soundscape_dir, soundscape_file, row_id, target):

    soundscape_name = soundscape_file.split('.')[0]
    soundscape, rate = torchaudio.load(os.path.join(soundscape_dir, soundscape_file))
    soundscape = soundscape[0]

    segments = int((soundscape.shape[0]/rate)/seconds_per_segment)


    current_birds = ["brnowl", "comsan","houspa","mallar3","norcar","skylar","dunlin","gnwtea","bcnher",
    "normoc",
    "wesmea",
    "eurwig",
    "houfin",
    "cangoo",
    "rorpar",
    "gwfgoo",
    "bkbplo",
    "rinphe",
    "gadwal",
    "osprey",
    "commyn",
    
    "akiapo", "aniani", "apapan", "barpet", "crehon", "elepai", "ercfra", 
    "hawama", "hawcre", "hawgoo", "hawhaw", "hawpet1", "houfin", "iiwi", 
    "jabwar", "maupar", "omao", "puaioh", "skylar", "warwhe1", "yefcan"

    ]
    
    current_birds = sorted(current_birds)

    current_scored_birds = []
    with open("../input/birdclef-2022/scored_birds.json") as f:
        current_scored_birds = json.load(f)
    current_scored_birds = sorted(current_scored_birds)


    model = BirdConv1d(n_input=1, n_output=len(current_birds))
    model.load_state_dict(torch.load('../input/birdclef-2022-model-1/birds_1d'))
    model.eval()
    tests_per_segment = 3




    for i in range(segments):
        found = {}
        for j in range(tests_per_segment):
            start = i*seconds_per_segment*rate + j*n_samples
            end = start + n_samples
            track = soundscape[start:end]
            track = track.reshape((1,1, -1))
            #track = torch.tensor(track).float()
            model = model.float()
            output = model(track)
            text_label = current_birds[int(torch.argmax(output))]
            found[text_label] = True

        for bird in current_scored_birds:
            row_id.append(soundscape_name + "_" + bird + "_" + str(i*5 + 5) )
            if bird in list(found.keys()):
                target.append(True)
            else:
                target.append(False)




In [3]:


rate = 32000
n_samples = 10000
seconds_per_segment = 5

soundscape_dir = '../input/birdclef-2022/test_soundscapes/'
soundscape_files = [f for f in sorted(os.listdir(soundscape_dir))]


row_id = []
target = []


for soundscape_file in soundscape_files:
    print("classifying file ", soundscape_file)
    classify_file(soundscape_dir, soundscape_file, row_id, target)

d = {'row_id': row_id, 'target': target}
pdscore = pd.DataFrame(d)
pdscore.to_csv('submission.csv', index=False)




classifying file  soundscape_453028782.ogg
