In [1]:
from birdcall.data import *
from birdcall.metrics import *

import pandas as pd

In [2]:
classes = pd.read_pickle('data/classes.pkl')
train_ds = MelspecPoolDataset(pd.read_pickle('data/train_set.pkl'), classes, len_mult=60)
valid_ds = MelspecPoolDataset(pd.read_pickle('data/val_set.pkl'), classes, len_mult=10)

In [3]:
len(train_ds), len(valid_ds)

(15840, 2640)

In [4]:
import torch
import torchvision
from torch import nn

In [5]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=NUM_WORKERS)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*16, shuffle=False, num_workers=NUM_WORKERS)

In [6]:
for b in train_dl: break
b[0].shape, b[1]

(torch.Size([16, 10, 3, 80, 212]),
 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64))

In [7]:
b[0].mean(), b[0].std()

(tensor(-0.0010), tensor(1.5770))

In [8]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(*list(torchvision.models.resnet34(True).children())[:-2])
        self.classifier = nn.Sequential(*[
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.BatchNorm1d(512),
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.BatchNorm1d(512),
            nn.Linear(512, len(classes))
        ])
    
    def forward(self, x):
        bs, im_num, ch, y_dim, x_dim = x.shape
        x = self.cnn(x.view(-1, ch, y_dim, x_dim))
        x = x.mean((2,3))
        x = self.classifier(x)
        x = x.view(bs, im_num, -1)
        x = x.max(1)[0]
        return x

In [9]:
model = Model().cuda()

In [10]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import time

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), 1e-3)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10)

In [11]:
for epoch in range(130):
    t0 = time.time()
    running_loss = 0.0
    for i, data in enumerate(train_dl, 0):
        model.train()
        inputs, labels = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        running_loss += loss.item()

        if i % len(train_dl) == len(train_dl)-1:
            model.eval();
            preds = []
            targs = []

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

                preds = torch.cat(preds)
                targs = torch.cat(targs)
            
            accuracy = accuracy_score(preds.sigmoid() > 0.5, targs)
            f1 = f1_score(preds.sigmoid() > 0.5, targs, average='micro')
            print(f'[{epoch + 1}, {time.time() - t0:.1f}] loss: {running_loss / (len(train_dl)-1):.3f}, acc: {accuracy:.3f}, f1: {f1:.3f}')
            running_loss = 0.0
            
            if (epoch % 10 == 0) and (epoch != 0): torch.save(model.state_dict(), f'models/{epoch}_maxpool_{round(f1, 2)}.pth')

[1, 275.6] loss: 0.187, acc: 0.000, f1: 0.000
[2, 270.2] loss: 0.025, acc: 0.000, f1: 0.000
[4, 269.5] loss: 0.025, acc: 0.000, f1: 0.000
[5, 269.1] loss: 0.025, acc: 0.000, f1: 0.000
[6, 269.3] loss: 0.025, acc: 0.000, f1: 0.000
[7, 270.0] loss: 0.025, acc: 0.000, f1: 0.000
[8, 270.1] loss: 0.025, acc: 0.000, f1: 0.000
[9, 269.5] loss: 0.025, acc: 0.000, f1: 0.000
[10, 268.8] loss: 0.025, acc: 0.000, f1: 0.000
[11, 269.2] loss: 0.025, acc: 0.000, f1: 0.000
[12, 268.9] loss: 0.025, acc: 0.000, f1: 0.000
[13, 269.4] loss: 0.025, acc: 0.000, f1: 0.000
[14, 268.9] loss: 0.025, acc: 0.000, f1: 0.000
[15, 268.2] loss: 0.025, acc: 0.000, f1: 0.000
[16, 269.7] loss: 0.025, acc: 0.000, f1: 0.000
[17, 269.8] loss: 0.024, acc: 0.000, f1: 0.000
[18, 270.0] loss: 0.024, acc: 0.000, f1: 0.000
[19, 269.0] loss: 0.024, acc: 0.000, f1: 0.000
[20, 268.4] loss: 0.024, acc: 0.000, f1: 0.000
[21, 269.2] loss: 0.023, acc: 0.000, f1: 0.000
[22, 269.5] loss: 0.023, acc: 0.000, f1: 0.000
[23, 269.5] loss: 0.0

KeyboardInterrupt: 