In [1]:
from birdcall.data import *
from birdcall.metrics import *

import pandas as pd

In [2]:
classes = pd.read_pickle('data/classes.pkl')
train_ds = MelspecPoolDataset(pd.read_pickle('data/train_set.pkl'), classes, len_mult=60)
valid_ds = MelspecPoolDataset(pd.read_pickle('data/val_set.pkl'), classes, len_mult=10)

In [3]:
len(train_ds), len(valid_ds)

(15840, 2640)

In [4]:
import torch
import torchvision
from torch import nn

In [5]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=NUM_WORKERS)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*16, shuffle=False, num_workers=NUM_WORKERS)

In [6]:
for b in train_dl: break
b[0].shape, b[1]

(torch.Size([16, 10, 3, 80, 212]),
 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64))

In [7]:
b[0].mean(), b[0].std()

(tensor(-0.0715), tensor(0.8814))

In [8]:
def lme_pool(x, alpha=1.0): # log-mean-exp pool
    '''alpha -> approximates maxpool, alpha -> 0 approximates mean pool'''
    T = x.shape[1]
    return 1/alpha * torch.log(1/T * torch.exp(alpha * x).sum(1))

In [9]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(*list(torchvision.models.resnet34(True).children())[:-2])
        self.classifier = nn.Sequential(*[
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.BatchNorm1d(512),
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.BatchNorm1d(512),
            nn.Linear(512, len(classes))
        ])
    
    def forward(self, x):
        bs, im_num, ch, y_dim, x_dim = x.shape
        x = self.cnn(x.view(-1, ch, y_dim, x_dim))
        x = x.mean((2,3))
        x = self.classifier(x)
        x = x.view(bs, im_num, -1)
        x = lme_pool(x)
        return x

In [11]:
model = Model().cuda()

In [12]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import time

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), 1e-3)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 5)

In [13]:
for epoch in range(130):
    t0 = time.time()
    running_loss = 0.0
    for i, data in enumerate(train_dl, 0):
        model.train()
        inputs, labels = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        running_loss += loss.item()

        if i % len(train_dl) == len(train_dl)-1:
            model.eval();
            preds = []
            targs = []

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

                preds = torch.cat(preds)
                targs = torch.cat(targs)
            
            accuracy = accuracy_score(preds.sigmoid() > 0.5, targs)
            f1 = f1_score(preds.sigmoid() > 0.5, targs, average='micro')
            print(f'[{epoch + 1}, {time.time() - t0:.1f}] loss: {running_loss / (len(train_dl)-1):.3f}, acc: {accuracy:.3f}, f1: {f1:.3f}')
            running_loss = 0.0
            
            if (epoch % 10 == 0) and (epoch != 0): torch.save(model.state_dict(), f'models/{epoch}_lmepool_{round(f1, 2)}.pth')

[1, 269.1] loss: 0.122, acc: 0.000, f1: 0.000
[3, 270.6] loss: 0.025, acc: 0.000, f1: 0.000
[4, 269.7] loss: 0.024, acc: 0.000, f1: 0.000
[5, 271.1] loss: 0.024, acc: 0.000, f1: 0.000
[6, 270.6] loss: 0.023, acc: 0.000, f1: 0.000
[7, 270.7] loss: 0.022, acc: 0.000, f1: 0.000
[8, 269.3] loss: 0.022, acc: 0.000, f1: 0.000
[9, 269.1] loss: 0.022, acc: 0.000, f1: 0.000
[10, 270.6] loss: 0.021, acc: 0.000, f1: 0.000
[11, 271.1] loss: 0.021, acc: 0.000, f1: 0.000
[12, 269.2] loss: 0.021, acc: 0.000, f1: 0.000
[13, 269.6] loss: 0.021, acc: 0.000, f1: 0.000
[14, 269.6] loss: 0.020, acc: 0.000, f1: 0.000
[15, 269.2] loss: 0.020, acc: 0.002, f1: 0.003
[16, 268.7] loss: 0.020, acc: 0.003, f1: 0.007
[17, 269.5] loss: 0.019, acc: 0.005, f1: 0.009
[18, 269.8] loss: 0.019, acc: 0.004, f1: 0.010
[19, 269.9] loss: 0.019, acc: 0.007, f1: 0.015
[20, 270.0] loss: 0.019, acc: 0.007, f1: 0.014
[21, 269.3] loss: 0.018, acc: 0.011, f1: 0.022
[22, 269.0] loss: 0.018, acc: 0.010, f1: 0.023
[23, 270.2] loss: 0.0

In [16]:
import numpy as np

f1s = []
ts = []
for t in np.linspace(0.4, 1, 61):
    f1s.append(f1_score(preds.sigmoid() > t, targs, average='micro'))
    ts.append(t)

In [17]:
max(f1s)

0.6946884148891677

In [18]:
ts[np.argmax(f1s)]

0.48000000000000004