In [1]:
import sys
sys.path.append('../input/bird-tools')
import noisereduce as nr

In [2]:
import torch
import numpy as np
import random
import os
class config:
    num_fold = 10
    sample_rate= 32_000
    sampleNum = 32_000*5
    n_fft=1024
    win_length = 1024
    hop_length=512
    n_mels=64
    duration=5
    num_classes = 152
    train_batch_size = 64
    valid_batch_size = 64
    epochs = 1
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    learning_rate = 1e-3

In [3]:
import pandas as pd
import os
PATH_TRAIN_DATASET = "../input/bird-filter-data/Slice_data"
path_csv = os.path.join(PATH_TRAIN_DATASET, "Filter_Clip_Data.csv")
train_meta = pd.read_csv(path_csv)
train_meta.head()

Unnamed: 0,primary_label,secondary_labels,type,rating,filename,duration,segmentNum,seg_index
0,afrsil1,[],"['call', 'flight call']",2.5,afrsil1/XC125458.ogg,11.102031,2.0,0.0
1,afrsil1,[],"['call', 'flight call']",2.5,afrsil1/XC125458.ogg,11.102031,2.0,1.0
2,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],3.5,afrsil1/XC175522.ogg,47.020406,9.0,0.0
3,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],3.5,afrsil1/XC175522.ogg,47.020406,9.0,1.0
4,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],3.5,afrsil1/XC175522.ogg,47.020406,9.0,2.0


In [4]:
train_meta = train_meta.dropna().reset_index(drop=True)

train_meta['new_filename'] = train_meta['filename'].str.replace('.ogg', '_') + train_meta['seg_index'].values.astype(int).astype(str) +'.ogg' 

  This is separate from the ipykernel package so we can avoid doing imports until


In [5]:
train_meta['new_filename'].str.len().max()

24

In [6]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
train_meta['primary_label_encoded'] = encoder.fit_transform(train_meta['primary_label'])

# save encoder

In [7]:
np.save('encoder_list.npy',encoder.classes_)
# np.load('encoder_list.npy',allow_pickle=True)

# create folds

In [8]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=config.num_fold)
for k, (_, val_ind) in enumerate(skf.split(X=train_meta, y=train_meta['primary_label_encoded'])):
    train_meta.loc[val_ind, 'fold'] = k



In [9]:
def get_data(df,fold):
    train_df = df[~df['fold'].isin(fold)].reset_index(drop=True)
    valid_df = df[df['fold'].isin(fold)].reset_index(drop=True)
    return train_df,valid_df  

train_df,valid_df = get_data(train_meta,[7,8,9])

In [10]:
valid_df[['primary_label','filename']].groupby('primary_label').count().describe()

Unnamed: 0,filename
count,152.0
mean,215.309211
std,276.315642
min,1.0
25%,31.75
50%,88.0
75%,300.0
max,1191.0


In [11]:
train_df[['primary_label','filename']].groupby('primary_label').count().describe()

Unnamed: 0,filename
count,152.0
mean,502.407895
std,644.303067
min,1.0
25%,74.0
50%,205.5
75%,701.75
max,2779.0


# Dataset

In [12]:
import torchaudio
# STFT
n_fft = 1024
win_length = 1024
hop_length = 512
transform = torchaudio.transforms.Spectrogram(
    n_fft = n_fft,           # freqGroup = n_fft//2 + 1
    win_length = win_length, # freq gap for each group
    hop_length = hop_length, # length = samples / hop_length
    center = True,
    pad_mode = 'reflect',
    power=2.0
).to('cpu')
transform

Spectrogram()

In [13]:
from torch.utils.data import Dataset, DataLoader

class BirdClefDataset(Dataset):
    def __init__(self, df):
        self.audio_paths = df['new_filename'].values
        self.labels = df['primary_label_encoded'].values
        
    def __len__(self):
        return len(self.audio_paths)
    
    def __getitem__(self, index):
        filename = os.path.join(PATH_TRAIN_DATASET, 'Slice_data',self.audio_paths[index])
        waveform,sr = torchaudio.load(filename)
        data = transform(waveform)   
        label = torch.tensor(self.labels[index])
        
        return data, label

# Model

In [14]:

import torch.nn as nn
import torch.nn.functional as F
# CNN
class BirdClefModel(nn.Module):
    def __init__(self):
        super(BirdClefModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128*16*9, 1024*4)
#         self.fc2 = nn.Linear(1024*8, 1024)
        self.fc3 = nn.Linear(1024*4, config.num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        # 513*313
        x = self.pool(x)
        # 256*156        
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        # 128*78
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        # 64*39
        x = F.relu(self.conv4(x))
        x = self.pool(x)
        # 32*19
        x = F.relu(self.conv5(x))
        x = self.pool(x)
        # 16*9
        
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout(x)
        x = self.fc3(x)
    
        return x


In [15]:
def loss_fn(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

# Train

In [16]:
from sklearn.metrics import f1_score

def train(model, data_loader, optimizer, scheduler, device, epoch):
    model.train()
    pred = []
    label = []
    
    running_loss = 0
    acc = 0
    loop = tqdm(data_loader, position=0)
    for i, (spec, labels) in enumerate(loop):
        spec = spec.to(device)
        labels = labels.to(device)
        
        outputs = model(spec)
        _, preds = torch.max(outputs, 1)
        acc += (preds==labels).sum()
        
        loss = loss_fn(outputs, labels)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if scheduler is not None:
            scheduler.step()
            
        running_loss += loss.item()
        pred.extend(preds.view(-1).cpu().detach().numpy())
        label.extend(labels.view(-1).cpu().detach().numpy())
        
        loop.set_description(f"Epoch [{epoch+1}/{config.epochs}]")
        loop.set_postfix(loss=loss.item())

    return running_loss/len(data_loader),acc/(len(data_loader)*config.train_batch_size)

In [17]:
def valid(model, data_loader, device, epoch):
    model.eval()
    
    acc = 0
    running_loss = 0
    pred = []
    label = []

    loop = tqdm(data_loader, position=0)
    for spec, labels in loop:
        spec = spec.to(device)
        labels = labels.to(device)
        
        outputs = model(spec)
        _, preds = torch.max(outputs, 1)
        acc += (preds==labels).sum()
        
        loss = loss_fn(outputs, labels)
            
        running_loss += loss.item()
        
        pred.extend(preds.view(-1).cpu().detach().numpy())
        label.extend(labels.view(-1).cpu().detach().numpy())
        
        loop.set_description(f"Epoch [{epoch+1}/{config.epochs}]")
        loop.set_postfix(loss=loss.item())
        
    valid_f1 = f1_score(label, pred, average='macro')
    
    return running_loss/len(data_loader), valid_f1,acc/(len(data_loader)*config.valid_batch_size)

In [18]:
train_dataset = BirdClefDataset(train_df)
valid_dataset = BirdClefDataset(valid_df)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.train_batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.valid_batch_size, shuffle=True)

In [26]:
from tqdm import tqdm

# model = BirdClefModel().to(config.device)

# PATH_Model = "../input/model-02f1"
# model_path = os.path.join(PATH_Model, "model.pt")
# model = BirdClefModel()
# model.load_state_dict(torch.load(model_path,map_location='cpu'))


optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, eta_min=1e-5, T_max=10)

for epoch in range(config.epochs):
    train_loss,train_acc = train(model, train_loader, optimizer, scheduler, config.device, epoch)
    valid_loss, valid_f1,valid_acc = valid(model, valid_loader, config.device, epoch)
    print('train Loss: ',train_loss,'train_acc: ',train_acc,'valid Loss: ',valid_loss,'valid_acc: ',valid_acc," vaild_F1",valid_f1)

Epoch [1/1]: 100%|██████████| 1194/1194 [1:49:22<00:00,  5.50s/it, loss=1.33] 
Epoch [1/1]: 100%|██████████| 512/512 [24:36<00:00,  2.88s/it, loss=12]  

train Loss:  1.2926300863424938 train_acc:  tensor(0.6944) valid Loss:  5.10744161112234 valid_acc:  tensor(0.3504)  vaild_F1 0.19653026205044935





In [27]:
model

BirdClefModel(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=18432, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=152, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [28]:
torch.save(model.state_dict(),"model.pt")
