In [1]:
from birdcall.data import *
from birdcall.metrics import *

import pandas as pd

In [2]:
classes = pd.read_pickle('data/classes.pkl')
train_ds = SpectrogramDataset(pd.read_pickle('data/train_set.pkl'), classes, len_mult=100, spec_max=80, spec_min=-100)
valid_ds = SpectrogramDataset(pd.read_pickle('data/val_set.pkl'), classes, len_mult=20)

In [3]:
len(train_ds), len(valid_ds)

(26400, 5280)

In [4]:
import torch
import torchvision
from torch import nn

In [5]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=160, shuffle=False, num_workers=NUM_WORKERS)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*160, shuffle=False, num_workers=NUM_WORKERS)

In [6]:
for b in train_dl: break
b[0].shape, b[1]

(torch.Size([160, 3, 128, 313]),
 tensor([[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64))

In [7]:
b[0].mean(), b[0].std()

(tensor(-0.0488), tensor(1.0572))

In [8]:
res50 = torchvision.models.resnet50(True)

In [9]:
bottom = nn.Sequential(*list(res50.children())[:6])
mid = nn.Sequential(*list(res50.children())[6:-2])

In [11]:
class Head(nn.Module):
    def __init__(self):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.layers = nn.ModuleList(
            [
                nn.Linear(2048, 1024), nn.ReLU(), nn.Dropout(p=0.2),
                nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
                nn.Linear(1024, len(classes))
            ]
        )
    def forward(self, x):
#         set_trace()
        x = self.avg_pool(x)
        x = x.view(x.shape[0], -1)
        for l in self.layers:
            x = l(x)
        return x

In [12]:
model = nn.Sequential(bottom, mid, Head())

In [13]:
model.cuda();

In [14]:
model(b[0].cuda()).shape

torch.Size([160, 264])

In [15]:
import torch.optim as optim

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), 1e-3)

In [16]:
from sklearn.metrics import accuracy_score, f1_score

In [18]:
for epoch in range(60):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_dl, 0):
        model.train()
        inputs, labels = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 165 == 164:
            model.eval();
            preds = []
            targs = []

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

                preds = torch.cat(preds)
                targs = torch.cat(targs)
            
            print('[%d, %5d] loss: %.3f, accuracy: %.3f, f1: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000, accuracy_score(preds.sigmoid() > 0.5, targs), f1_score(preds.sigmoid() > 0.5, targs, average='micro')))
            running_loss = 0.0

[1,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[2,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[3,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[4,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[5,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[6,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[7,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[8,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[9,   165] loss: 0.002, accuracy: 0.000, f1: 0.000
[10,   165] loss: 0.001, accuracy: 0.001, f1: 0.001
[11,   165] loss: 0.001, accuracy: 0.004, f1: 0.007
[12,   165] loss: 0.001, accuracy: 0.005, f1: 0.006
[13,   165] loss: 0.001, accuracy: 0.003, f1: 0.004
[14,   165] loss: 0.001, accuracy: 0.004, f1: 0.005
[15,   165] loss: 0.001, accuracy: 0.000, f1: 0.001
[16,   165] loss: 0.001, accuracy: 0.000, f1: 0.000
[17,   165] loss: 0.001, accuracy: 0.000, f1: 0.000
[18,   165] loss: 0.001, accuracy: 0.000, f1: 0.000
[19,   165] loss: 0.001, accuracy: 0.002, f1: 0.002
[20,   165] loss: 0.0