In [1]:
from birdcall.data import *
from birdcall.metrics import *
from birdcall.ops import *

import torch
import torchvision
from torch import nn
import numpy as np
import pandas as pd
from pathlib import Path
import soundfile as sf

In [2]:
BS = 16
MAX_LR = 1e-3

In [3]:
classes = pd.read_pickle('data/classes.pkl')

In [None]:
# all_train_items = pd.read_pickle('data/all_train_items.pkl')

# all_train_items_npy = []

# for ebird_code, path, duration in all_train_items:
#     fn = path.stem
#     new_path = Path(f'data/npy/train_resampled/{ebird_code}/{fn}.npy')
#     all_train_items_npy.append((ebird_code, new_path, duration))
    
# pd.to_pickle(all_train_items_npy, 'data/all_train_items_npy.pkl')

In [4]:
splits = pd.read_pickle('data/all_splits.pkl')
all_train_items = pd.read_pickle('data/all_train_items_npy.pkl')

train_items = np.array(all_train_items)[splits[0][0]].tolist()
val_items = np.array(all_train_items)[splits[0][1]].tolist()

In [5]:
# from collections import defaultdict

# class2train_items = defaultdict(list)

# for cls_name, path, duration in train_items:
#     class2train_items[cls_name].append((path, duration))

In [6]:
# pd.to_pickle(class2train_items, 'data/class2train_items.pkl')

In [7]:
class2train_items = pd.read_pickle('data/class2train_items.pkl')

In [8]:
train_ds = MelspecPoolDataset(class2train_items, classes, len_mult=50, normalize=False)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=BS, num_workers=NUM_WORKERS, pin_memory=True, shuffle=True)

In [9]:
val_items = [(classes.index(item[0]), item[1], item[2]) for item in val_items]
val_items_binned = bin_items_negative_class(val_items)

In [10]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(*list(torchvision.models.resnet34(False).children())[:-2])
        self.classifier = nn.Sequential(*[
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.BatchNorm1d(512),
            nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.BatchNorm1d(512),
            nn.Linear(512, len(classes))
        ])
    
    def forward(self, x):
        max_per_example = x.view(x.shape[0], -1).max(1)[0] # scaling to between 0 and 1
        x /= max_per_example[:, None, None, None, None]     # per example!
        bs, im_num = x.shape[:2]
        x = x.view(-1, x.shape[2], x.shape[3], x.shape[4])
        x = self.cnn(x)
        x = x.mean((2,3))
        x = self.classifier(x)
        x = x.view(bs, im_num, -1)
        x = lme_pool(x)
        return x

In [11]:
model = Model().cuda()

In [12]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import time

In [13]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), MAX_LR)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 5)

In [14]:
# sc_items = pd.read_pickle('data/soundscape_items.pkl')

# sc_items_npy = []
# for labels, path, offset in sc_items:
#     sc_items_npy.append((labels, Path(f'data/npy/shifted/{path.stem}.npy'), offset))
    
# pd.to_pickle(sc_items_npy, 'data/soundscape_items_npy.pkl')

In [15]:
sc_ds = SoundscapeMelspecPoolDataset(pd.read_pickle('data/soundscape_items_npy.pkl'), classes)
sc_dl = torch.utils.data.DataLoader(sc_ds, batch_size=2*BS, num_workers=NUM_WORKERS, pin_memory=True)

In [17]:
t0 = time.time()
for epoch in range(180):
    running_loss = 0.0
    for i, data in enumerate(train_dl, 0):
        model.train()
        inputs, labels = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        if np.isnan(loss.item()): 
            raise Exception(f'!!! nan encountered in loss !!! epoch: {epoch}\n')
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()


    if epoch % 5 == 4:
        model.eval();
        preds = []
        targs = []

        for num_specs in val_items_binned.keys():
            valid_ds = MelspecShortishValidatioDataset(val_items_binned[num_specs], classes)
            valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*BS, num_workers=NUM_WORKERS, pin_memory=True)

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

        preds = torch.cat(preds)
        targs = torch.cat(targs)

        f1s = []
        ts = []
        for t in np.linspace(0.4, 1, 61):
            f1s.append(f1_score(preds.sigmoid() > t, targs, average='micro'))
            ts.append(t)
        
        sc_preds = []
        sc_targs = []
        with torch.no_grad():
            for data in sc_dl:
                inputs, labels = data[0].cuda(), data[1].cuda()
                outputs = model(inputs)
                sc_preds.append(outputs.cpu().detach())
                sc_targs.append(labels.cpu().detach())

        sc_preds = torch.cat(sc_preds)
        sc_targs = torch.cat(sc_targs)
        sc_f1 = f1_score(sc_preds.sigmoid() > 0.5, sc_targs, average='micro')
        
        sc_f1s = []
        sc_ts = []
        for t in np.linspace(0.4, 1, 61):
            sc_f1s.append(f1_score(sc_preds.sigmoid() > t, sc_targs, average='micro'))
            sc_ts.append(t)
        
        print(f'[{epoch + 1}, {(time.time() - t0)/60:.1f}] loss: {running_loss / (len(train_dl)-1):.3f}, f1: {max(f1s):.3f}, sc_f1: {max(sc_f1s):.3f}')
        running_loss = 0.0

        torch.save(model.state_dict(), f'models/{epoch+1}_lmepool_simple_minmax_not_pretrained_{round(max(f1s), 2)}.pth')

[5, 14.7] loss: 0.023, f1: 0.000, sc_f1: 0.000
[10, 29.3] loss: 0.020, f1: 0.000, sc_f1: 0.000
[15, 44.0] loss: 0.018, f1: 0.039, sc_f1: 0.000
[20, 58.6] loss: 0.017, f1: 0.122, sc_f1: 0.000
[25, 73.3] loss: 0.015, f1: 0.243, sc_f1: 0.000
[30, 87.9] loss: 0.014, f1: 0.327, sc_f1: 0.000
[35, 102.6] loss: 0.012, f1: 0.401, sc_f1: 0.000
[40, 117.3] loss: 0.011, f1: 0.492, sc_f1: 0.000
[45, 131.9] loss: 0.010, f1: 0.547, sc_f1: 0.000
[50, 146.6] loss: 0.009, f1: 0.552, sc_f1: 0.000
[55, 161.2] loss: 0.008, f1: 0.603, sc_f1: 0.000
[60, 175.9] loss: 0.007, f1: 0.620, sc_f1: 0.000
[65, 190.6] loss: 0.006, f1: 0.643, sc_f1: 0.000
[70, 205.2] loss: 0.006, f1: 0.664, sc_f1: 0.000
[75, 219.8] loss: 0.005, f1: 0.676, sc_f1: 0.000
[80, 234.5] loss: 0.005, f1: 0.681, sc_f1: 0.000
[85, 249.1] loss: 0.004, f1: 0.677, sc_f1: 0.000
[90, 263.8] loss: 0.004, f1: 0.682, sc_f1: 0.000
[95, 278.4] loss: 0.003, f1: 0.700, sc_f1: 0.000
[100, 293.1] loss: 0.003, f1: 0.702, sc_f1: 0.000
[105, 307.7] loss: 0.003, 

In [18]:
t0 = time.time()
for epoch in range(180, 280):
    running_loss = 0.0
    for i, data in enumerate(train_dl, 0):
        model.train()
        inputs, labels = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        if np.isnan(loss.item()): 
            raise Exception(f'!!! nan encountered in loss !!! epoch: {epoch}\n')
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()


    if epoch % 5 == 4:
        model.eval();
        preds = []
        targs = []

        for num_specs in val_items_binned.keys():
            valid_ds = MelspecShortishValidatioDataset(val_items_binned[num_specs], classes)
            valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=2*BS, num_workers=NUM_WORKERS, pin_memory=True)

            with torch.no_grad():
                for data in valid_dl:
                    inputs, labels = data[0].cuda(), data[1].cuda()
                    outputs = model(inputs)
                    preds.append(outputs.cpu().detach())
                    targs.append(labels.cpu().detach())

        preds = torch.cat(preds)
        targs = torch.cat(targs)

        f1s = []
        ts = []
        for t in np.linspace(0.4, 1, 61):
            f1s.append(f1_score(preds.sigmoid() > t, targs, average='micro'))
            ts.append(t)
        
        sc_preds = []
        sc_targs = []
        with torch.no_grad():
            for data in sc_dl:
                inputs, labels = data[0].cuda(), data[1].cuda()
                outputs = model(inputs)
                sc_preds.append(outputs.cpu().detach())
                sc_targs.append(labels.cpu().detach())

        sc_preds = torch.cat(sc_preds)
        sc_targs = torch.cat(sc_targs)
        sc_f1 = f1_score(sc_preds.sigmoid() > 0.5, sc_targs, average='micro')
        
        sc_f1s = []
        sc_ts = []
        for t in np.linspace(0.4, 1, 61):
            sc_f1s.append(f1_score(sc_preds.sigmoid() > t, sc_targs, average='micro'))
            sc_ts.append(t)
        
        print(f'[{epoch + 1}, {(time.time() - t0)/60:.1f}] loss: {running_loss / (len(train_dl)-1):.3f}, f1: {max(f1s):.3f}, sc_f1: {max(sc_f1s):.3f}')
        running_loss = 0.0

        torch.save(model.state_dict(), f'models/{epoch+1}_lmepool_simple_minmax_not_pretrained_{round(max(f1s), 2)}.pth')

[185, 14.6] loss: 0.001, f1: 0.712, sc_f1: 0.000
[190, 29.3] loss: 0.001, f1: 0.710, sc_f1: 0.000
[195, 44.0] loss: 0.001, f1: 0.710, sc_f1: 0.000
[200, 58.6] loss: 0.001, f1: 0.715, sc_f1: 0.000
[205, 73.3] loss: 0.001, f1: 0.707, sc_f1: 0.000
[210, 87.9] loss: 0.001, f1: 0.713, sc_f1: 0.000
[215, 102.6] loss: 0.001, f1: 0.706, sc_f1: 0.000
[220, 117.3] loss: 0.001, f1: 0.712, sc_f1: 0.000
[225, 132.0] loss: 0.001, f1: 0.708, sc_f1: 0.000
[230, 146.7] loss: 0.001, f1: 0.712, sc_f1: 0.000
[235, 161.4] loss: 0.001, f1: 0.706, sc_f1: 0.000
[240, 176.1] loss: 0.001, f1: 0.706, sc_f1: 0.000
[245, 190.9] loss: 0.001, f1: 0.708, sc_f1: 0.000
[250, 205.7] loss: 0.001, f1: 0.718, sc_f1: 0.000
[255, 220.6] loss: 0.001, f1: 0.707, sc_f1: 0.000
[260, 235.4] loss: 0.001, f1: 0.712, sc_f1: 0.000
[265, 250.3] loss: 0.001, f1: 0.700, sc_f1: 0.000
[270, 265.2] loss: 0.001, f1: 0.709, sc_f1: 0.000
[275, 280.0] loss: 0.001, f1: 0.712, sc_f1: 0.000
[280, 294.8] loss: 0.001, f1: 0.709, sc_f1: 0.000


In [19]:
from IPython.lib.display import FileLink

In [20]:
FileLink('models/280_lmepool_simple_minmax_not_pretrained_0.71.pth')