In [1]:
# https://arxiv.org/abs/1409.7495

In [2]:
from birdcall.data import *
from birdcall.metrics import *
from birdcall.ops import *

import torch
import torchvision
from torch import nn
import numpy as np
import pandas as pd
from pathlib import Path
import soundfile as sf
from collections import defaultdict

In [3]:
classes = pd.read_pickle('data/classes.pkl')

In [4]:
splits = pd.read_pickle('data/all_splits.pkl')
all_train_items = pd.read_pickle('data/all_train_items.pkl')

train_items = np.array(all_train_items)[splits[0][0]].tolist()
val_items = np.array(all_train_items)[splits[0][1]].tolist()

In [5]:
class2train_items = defaultdict(list)

for cls_name, path, duration in train_items:
    class2train_items[cls_name].append((path, duration))

In [6]:
train_ds = MelspecPoolWithShiftedDataset(class2train_items, classes, len_mult=50)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, num_workers=NUM_WORKERS, pin_memory=True, shuffle=True)

In [7]:
val_items = [(classes.index(item[0]), item[1], item[2]) for item in val_items]
val_items_binned = bin_items_negative_class(val_items)

In [8]:
from torch.autograd import Function

In [9]:
def alpha(p, delta=10):
    return 2 / (1 + np.exp(-delta*p)) - 1

In [10]:
class GradientReversal(Function):
    @staticmethod
    def forward(ctx, input):
        return input

    @staticmethod
    def backward(ctx, grad_output):
        return - alpha(p) * grad_output

In [11]:
class Classifier(nn.Module):
    def __init__(self, num_classes, p=0.5):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=p), nn.BatchNorm1d(512),
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=p), nn.BatchNorm1d(512),
            nn.Linear(512, num_classes)
        ])
    def forward(self, x):
        for l in self.layers:
            x = l(x)
        return x

In [12]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(*list(torchvision.models.resnet34(True).children())[:-2])
        self.ebird_classifier = Classifier(len(classes))
        self.domain_classifier = Classifier(1)
        self.grad_reversal = GradientReversal()
    
    def forward(self, x):
        x = torch.log10(1 + x)
        max_per_example = x.view(x.shape[0], -1).max(1)[0] # scaling to between 0 and 1
        x /= max_per_example[:, None, None, None, None]     # per example!
        bs, im_num = x.shape[:2]
        x = x.view(-1, x.shape[2], x.shape[3], x.shape[4])
        x = self.cnn(x)
        per_image_features = x.mean((2,3))
        
        ebird_logits = self.ebird_classifier(per_image_features)
        
        reversed_grads = self.grad_reversal.apply(per_image_features)
        domain_logits = self.domain_classifier(reversed_grads)
        
        ebird_preds = lme_pool(ebird_logits.view(bs, im_num, -1))
        domain_preds = lme_pool(domain_logits.view(bs, im_num, -1))
        
        return ebird_preds, domain_preds

In [13]:
model = Model().cuda()

In [14]:
state_dict = torch.load('models/235_lmepool_simple_minmax_log_0.74.pth')

In [15]:
model.load_state_dict(state_dict, strict=False)

_IncompatibleKeys(missing_keys=['ebird_classifier.layers.0.weight', 'ebird_classifier.layers.0.bias', 'ebird_classifier.layers.3.weight', 'ebird_classifier.layers.3.bias', 'ebird_classifier.layers.3.running_mean', 'ebird_classifier.layers.3.running_var', 'ebird_classifier.layers.4.weight', 'ebird_classifier.layers.4.bias', 'ebird_classifier.layers.7.weight', 'ebird_classifier.layers.7.bias', 'ebird_classifier.layers.7.running_mean', 'ebird_classifier.layers.7.running_var', 'ebird_classifier.layers.8.weight', 'ebird_classifier.layers.8.bias', 'domain_classifier.layers.0.weight', 'domain_classifier.layers.0.bias', 'domain_classifier.layers.3.weight', 'domain_classifier.layers.3.bias', 'domain_classifier.layers.3.running_mean', 'domain_classifier.layers.3.running_var', 'domain_classifier.layers.4.weight', 'domain_classifier.layers.4.bias', 'domain_classifier.layers.7.weight', 'domain_classifier.layers.7.bias', 'domain_classifier.layers.7.running_mean', 'domain_classifier.layers.7.running_

In [16]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import time

In [17]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), 1e-3)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 5)

In [18]:
sc_ds = SoundscapeMelspecPoolDataset(pd.read_pickle('data/soundscape_items.pkl'), classes)
sc_dl = torch.utils.data.DataLoader(sc_ds, batch_size=2*16, num_workers=NUM_WORKERS, pin_memory=True)

In [None]:
t0 = time.time()
total_epochs = 260
for epoch in range(total_epochs):
    running_loss = 0.0
    p = (epoch + 1) / total_epochs
    for data in train_dl:
        model.train()
        inputs, ebird_labels, domain_labels = data[0].cuda(), data[1].cuda(), data[2].cuda()
        optimizer.zero_grad()

        ebird_preds, domain_preds = model(inputs)
        ebird_loss = criterion(ebird_preds[domain_labels == 0], ebird_labels[domain_labels == 0])
        domain_loss = criterion(domain_preds, domain_labels.unsqueeze(1))
        loss = ebird_loss + domain_loss

        if np.isnan(loss.item()): raise Exception(f'!!! nan encountered in loss !!! epoch: {epoch}\n')
        
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()

    if epoch % 5 == 4:
        model.eval();
        preds = []
        targs = []

        for num_specs in val_items_binned.keys():
            valid_ds = MelspecShortishValidatioDataset(val_items_binned[num_specs], classes)
            valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*16, num_workers=NUM_WORKERS, pin_memory=True)

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)[0]
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

        preds = torch.cat(preds)
        targs = torch.cat(targs)

        f1s = []
        ts = []
        for t in np.linspace(0.4, 1, 61):
            f1s.append(f1_score(preds.sigmoid() > t, targs, average='micro'))
            ts.append(t)
        
        sc_preds = []
        sc_targs = []
        with torch.no_grad():
            for data in sc_dl:
                inputs, labels = data[0].cuda(), data[1].cuda()
                outputs = model(inputs)[0]
                sc_preds.append(outputs.cpu().detach())
                sc_targs.append(labels.cpu().detach())

        sc_preds = torch.cat(sc_preds)
        sc_targs = torch.cat(sc_targs)
        sc_f1 = f1_score(sc_preds.sigmoid() > 0.5, sc_targs, average='micro')
        
        sc_f1s = []
        sc_ts = []
        for t in np.linspace(0.4, 1, 61):
            sc_f1s.append(f1_score(sc_preds.sigmoid() > t, sc_targs, average='micro'))
            sc_ts.append(t)
        
        print(f'[{epoch + 1}, {(time.time() - t0)/60:.1f}] loss: {running_loss / (len(train_dl)-1):.3f}, f1: {max(f1s):.3f}, sc_f1: {max(sc_f1s):.3f}')
        running_loss = 0.0

        torch.save(model.state_dict(), f'models/{epoch+1}_lmepool_simple_minmax_log_da_{round(max(f1s), 2)}.pth')

[5, 19.1] loss: 0.504, f1: 0.247, sc_f1: 0.000
[10, 41.8] loss: 0.503, f1: 0.570, sc_f1: 0.000
[15, 63.5] loss: 0.508, f1: 0.607, sc_f1: 0.000
[20, 86.6] loss: 0.509, f1: 0.630, sc_f1: 0.010
[25, 109.3] loss: 0.513, f1: 0.589, sc_f1: 0.000
[30, 133.6] loss: 0.522, f1: 0.648, sc_f1: 0.000
[35, 159.0] loss: 0.517, f1: 0.655, sc_f1: 0.000
[40, 182.4] loss: 0.516, f1: 0.573, sc_f1: 0.000
[45, 203.5] loss: 0.520, f1: 0.618, sc_f1: 0.000
[50, 228.6] loss: 0.507, f1: 0.675, sc_f1: 0.011
[55, 252.5] loss: 0.508, f1: 0.584, sc_f1: 0.000
[60, 274.6] loss: 0.511, f1: 0.652, sc_f1: 0.000
[65, 299.9] loss: 0.515, f1: 0.622, sc_f1: 0.000
[70, 322.2] loss: 0.508, f1: 0.577, sc_f1: 0.000
[75, 344.5] loss: 0.512, f1: 0.595, sc_f1: 0.000
[80, 368.1] loss: 0.508, f1: 0.393, sc_f1: 0.000
[85, 390.6] loss: 0.513, f1: 0.366, sc_f1: 0.000
[90, 413.6] loss: 0.515, f1: 0.321, sc_f1: 0.000


In [21]:
t0 = time.time()
total_epochs = 200
for epoch in range(total_epochs):
    running_loss = 0.0
    p = (epoch + 1) / total_epochs
    for data in train_dl:
        model.train()
        inputs, ebird_labels, domain_labels = data[0].cuda(), data[1].cuda(), data[2].cuda()
        optimizer.zero_grad()

        ebird_preds, domain_preds = model(inputs)
        ebird_loss = criterion(ebird_preds[domain_labels == 0], ebird_labels[domain_labels == 0])
        domain_loss = criterion(domain_preds, domain_labels.unsqueeze(1))
        loss = ebird_loss + domain_loss

        if np.isnan(loss.item()): raise Exception(f'!!! nan encountered in loss !!! epoch: {epoch}\n')
        
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()

    if epoch % 5 == 4:
        model.eval();
        preds = []
        targs = []

        for num_specs in val_items_binned.keys():
            valid_ds = MelspecShortishValidatioDataset(val_items_binned[num_specs], classes)
            valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*16, num_workers=NUM_WORKERS, pin_memory=True)

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)[0]
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

        preds = torch.cat(preds)
        targs = torch.cat(targs)

        f1s = []
        ts = []
        for t in np.linspace(0.4, 1, 61):
            f1s.append(f1_score(preds.sigmoid() > t, targs, average='micro'))
            ts.append(t)
        
        sc_preds = []
        sc_targs = []
        with torch.no_grad():
            for data in sc_dl:
                inputs, labels = data[0].cuda(), data[1].cuda()
                outputs = model(inputs)[0]
                sc_preds.append(outputs.cpu().detach())
                sc_targs.append(labels.cpu().detach())

        sc_preds = torch.cat(sc_preds)
        sc_targs = torch.cat(sc_targs)
        sc_f1 = f1_score(sc_preds.sigmoid() > 0.5, sc_targs, average='micro')
        
        sc_f1s = []
        sc_ts = []
        for t in np.linspace(0.4, 1, 61):
            sc_f1s.append(f1_score(sc_preds.sigmoid() > t, sc_targs, average='micro'))
            sc_ts.append(t)
        
        print(f'[{epoch + 1}, {(time.time() - t0)/60:.1f}] loss: {running_loss / (len(train_dl)-1):.3f}, f1: {max(f1s):.3f}, sc_f1: {max(sc_f1s):.3f}')
        running_loss = 0.0

        torch.save(model.state_dict(), f'models/{epoch+1}_lmepool_simple_minmax_log_da_{round(max(f1s), 2)}.pth')

[5, 21.9] loss: 0.514, f1: 0.485, sc_f1: 0.000
[10, 44.2] loss: 0.512, f1: 0.537, sc_f1: 0.000
[15, 66.4] loss: 0.520, f1: 0.576, sc_f1: 0.000
[20, 87.7] loss: 0.509, f1: 0.538, sc_f1: 0.000
[25, 110.8] loss: 0.518, f1: 0.509, sc_f1: 0.000
[30, 132.6] loss: 0.506, f1: 0.539, sc_f1: 0.000
[35, 153.4] loss: 0.511, f1: 0.423, sc_f1: 0.000
[40, 176.2] loss: 0.510, f1: 0.534, sc_f1: 0.000
[45, 197.9] loss: 0.517, f1: 0.583, sc_f1: 0.000
[50, 219.3] loss: 0.505, f1: 0.512, sc_f1: 0.000
[55, 240.7] loss: 0.521, f1: 0.522, sc_f1: 0.000
[60, 262.7] loss: 0.511, f1: 0.542, sc_f1: 0.000
[65, 285.0] loss: 0.512, f1: 0.542, sc_f1: 0.000
[70, 307.3] loss: 0.520, f1: 0.531, sc_f1: 0.000
[75, 329.1] loss: 0.516, f1: 0.500, sc_f1: 0.000
[80, 350.1] loss: 0.518, f1: 0.453, sc_f1: 0.000
[85, 371.7] loss: 0.516, f1: 0.470, sc_f1: 0.000
[90, 393.3] loss: 0.513, f1: 0.488, sc_f1: 0.000
[95, 415.2] loss: 0.518, f1: 0.456, sc_f1: 0.000
[100, 437.2] loss: 0.509, f1: 0.453, sc_f1: 0.000
[105, 459.7] loss: 0.509

In [22]:
from IPython.lib.display import FileLink

In [23]:
FileLink('models/200_lmepool_simple_minmax_log_da_0.59.pth')