In [1]:
import numpy as np
import pandas as pd

import os
import glob
import librosa
import tqdm
import pickle

from collections import Counter, defaultdict

from torchvision.datasets.folder import default_loader
from torch.utils.data import TensorDataset, DataLoader
from torch import nn
import torch.nn.functional as F
import torch

DATA_PATH = './data'
join_path = lambda path: os.path.join(DATA_PATH, path)

In [2]:
filepaths = glob.glob(join_path('train_curated/*')) # именно в таком порядке будут идти все аудиозаписи

train = pd.read_csv(join_path('train_curated.csv')).set_index('fname')

cats = list(set(sum(list(train['labels'].str.split(',').values), [])))

train['enc_labels'] = train['labels'].str.split(',').apply(lambda x: [int(label in x) for label in cats])

targets = np.array(train.loc[[fp[-12:] for fp in filepaths], 'enc_labels'].tolist())

In [3]:
with open(join_path('train_curated_mels.pkl'), 'rb') as f:
    data = pickle.load(f)

In [4]:
import mclnn
base = nn.Sequential(
    mclnn.CLNNModule(128, 256, 4, dilation=1),
    nn.LeakyReLU(),
    mclnn.BatchNorm(256),
    mclnn.CLNNModule(256, 256, 4, dilation=1),
    nn.LeakyReLU(),
    mclnn.TemporalPool(2),
    mclnn.BatchNorm(256),
    mclnn.CLNNModule(256, 256, 4, dilation=1),
    nn.LeakyReLU(),
    mclnn.TemporalPool(2),
    mclnn.CLNNModule(256, 256, 4),
    nn.LeakyReLU(),
    mclnn.TemporalPool(4),
    mclnn.BatchNorm(256),
    mclnn.Flatten(),
    nn.Linear(256*8, 1024),
    nn.LeakyReLU(),
    nn.Linear(1024, 80)
)

class W(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

        
    def forward(self, input):
        input = input.squeeze(1)
        return self.model(input)
    
model = W(base)

In [5]:
# pip install scikit-multilearn 
from skmultilearn.model_selection import iterative_train_test_split

train_X, train_y, test_X, test_y = iterative_train_test_split(X=data, y=targets, test_size=0.1)

In [6]:
train_ds = TensorDataset(torch.tensor(train_X).unsqueeze(1), torch.tensor(train_y).float())
val_ds = TensorDataset(torch.tensor(test_X).unsqueeze(1), torch.tensor(test_y).float())

train_dl = DataLoader(train_ds, batch_size=64)
val_dl = DataLoader(val_ds, batch_size=64)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = Model(num_classes=80)
logdir = join_path('logdir')
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [8]:
from kirill.flame import Trainer # мой небольшой пайплайн для тренировки сеток

trainer = Trainer(model, optimizer, criterion, device, logdir)

CRITICAL:root:Cannot load caffe2.python. Error: /opt/caffe2/build/caffe2/python/caffe2_pybind11_state.so: undefined symbol: _ZN5fLI6438FLAGS_caffe2_max_keep_on_shrink_memoryE


In [12]:
# # пример запуска для минимизации логлосса
trainer.train(train_dl, val_dl, n_epochs=240, verbose=True)


# losses, best_loss, epoch_time = utils.train_model(trainer, train_dl, val_dl, val_y, scheduler,
#                                             n_epochs=1000, gap=20, verbose=True)



  1 epoch. train: 0.00048. val: 0.00086. time: 5.031s.
  2 epoch. train: 0.00043. val: 0.00085. time: 13.961s.
  3 epoch. train: 0.00038. val: 0.00088. time: 22.973s.
  4 epoch. train: 0.00033. val: 0.00095. time: 31.990s.
  5 epoch. train: 0.00028. val: 0.00105. time: 41.023s.
  6 epoch. train: 0.00024. val: 0.00103. time: 50.071s.
  7 epoch. train: 0.00022. val: 0.00098. time: 59.134s.
  8 epoch. train: 0.00018. val: 0.00109. time: 68.230s.
  9 epoch. train: 0.00016. val: 0.00110. time: 77.352s.
 10 epoch. train: 0.00013. val: 0.00119. time: 86.451s.
 11 epoch. train: 0.00011. val: 0.00121. time: 95.603s.
 12 epoch. train: 0.00009. val: 0.00119. time: 104.797s.
 13 epoch. train: 0.00007. val: 0.00127. time: 113.968s.
 14 epoch. train: 0.00005. val: 0.00128. time: 123.109s.
 15 epoch. train: 0.00004. val: 0.00128. time: 132.214s.
 16 epoch. train: 0.00003. val: 0.00132. time: 141.133s.
 17 epoch. train: 0.00003. val: 0.00135. time: 150.281s.
 18 epoch. train: 0.00002. val: 0.00140. ti

144 epoch. train: 0.00000. val: 0.00255. time: 1309.571s.
145 epoch. train: 0.00000. val: 0.00258. time: 1318.730s.
146 epoch. train: 0.00000. val: 0.00258. time: 1327.877s.
147 epoch. train: 0.00000. val: 0.00261. time: 1337.052s.
148 epoch. train: 0.00000. val: 0.00257. time: 1346.225s.
149 epoch. train: 0.00007. val: 0.00227. time: 1355.366s.
150 epoch. train: 0.00012. val: 0.00235. time: 1364.523s.
151 epoch. train: 0.00007. val: 0.00235. time: 1373.653s.
152 epoch. train: 0.00002. val: 0.00245. time: 1382.765s.
153 epoch. train: 0.00001. val: 0.00239. time: 1391.810s.
154 epoch. train: 0.00000. val: 0.00250. time: 1400.928s.
155 epoch. train: 0.00000. val: 0.00260. time: 1410.084s.
156 epoch. train: 0.00000. val: 0.00257. time: 1419.214s.
157 epoch. train: 0.00000. val: 0.00260. time: 1428.324s.
158 epoch. train: 0.00000. val: 0.00261. time: 1437.441s.
159 epoch. train: 0.00000. val: 0.00261. time: 1446.603s.
160 epoch. train: 0.00000. val: 0.00263. time: 1455.730s.
161 epoch. tra

In [18]:
from metrics import lwlrap_score

# если хочется считать скор метрики оргов
losses = []
for i in range(20):
    # делает один проход по даталоадеру
    trainer.train_step(train_dl, verbose=True)
    
    # формируем предсказания
    model.eval()
    preds = []
    for batch in tqdm.tqdm(val_dl):
        batch = batch[0].to(device)
        pred = model(batch)
        preds.append(pred.cpu().detach().numpy())
    preds = np.vstack(preds)
    
    # считаем скор метрики оргов
    score = lwlrap_score(test_y, preds)
    print(score)
    
    losses.append(score)

Loss: 0.06763. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.24it/s]
100%|██████████| 8/8 [00:00<00:00, 10.90it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.32207662069576626


Loss: 0.05798. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.67it/s]
100%|██████████| 8/8 [00:00<00:00, 13.35it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.4166380930591757


Loss: 0.05236. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.22it/s]
100%|██████████| 8/8 [00:00<00:00, 12.11it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.4582206474353202


Loss: 0.04700. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.20it/s]
100%|██████████| 8/8 [00:00<00:00, 12.24it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.4928546655237012


Loss: 0.04262. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.63it/s]
100%|██████████| 8/8 [00:00<00:00, 13.09it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.5295653557993734


Loss: 0.03912. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.20it/s]
100%|██████████| 8/8 [00:00<00:00, 12.19it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.5703799406704296


Loss: 0.03511. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.16it/s]
100%|██████████| 8/8 [00:00<00:00, 11.98it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.5884857284496896


Loss: 0.03391. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.12it/s]
100%|██████████| 8/8 [00:00<00:00, 11.95it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.613799956117646


Loss: 0.03119. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.15it/s]
100%|██████████| 8/8 [00:00<00:00, 11.86it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.5996618968494476


Loss: 0.02637. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.57it/s]
100%|██████████| 8/8 [00:00<00:00, 13.34it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.5988951360838559


Loss: 0.02486. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.61it/s]
100%|██████████| 8/8 [00:00<00:00, 13.27it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.646209654193764


Loss: 0.02322. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.60it/s]
100%|██████████| 8/8 [00:00<00:00, 13.29it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6551752026880457


Loss: 0.02049. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.46it/s]
100%|██████████| 8/8 [00:00<00:00, 12.92it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6473820956121759


Loss: 0.01926. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.50it/s]
100%|██████████| 8/8 [00:00<00:00, 12.10it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6441005260998045


Loss: 0.01784. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.59it/s]
100%|██████████| 8/8 [00:00<00:00, 12.99it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6626067061164259


Loss: 0.01505. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.58it/s]
100%|██████████| 8/8 [00:00<00:00, 12.93it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6737623354909517


Loss: 0.01228. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.56it/s]
100%|██████████| 8/8 [00:00<00:00, 12.99it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6784166777225116


Loss: 0.01107. Iter: 100%|██████████| 70/70 [00:14<00:00,  5.61it/s]
100%|██████████| 8/8 [00:00<00:00, 12.97it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6772803548081768


Loss: 0.01175. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.17it/s]
100%|██████████| 8/8 [00:00<00:00, 12.84it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

0.6778412922729062


Loss: 0.01064. Iter: 100%|██████████| 70/70 [00:15<00:00,  5.19it/s]
100%|██████████| 8/8 [00:00<00:00, 11.81it/s]

0.6585010786758971





In [None]:
# test inference

with open(join_path('test_mels.pkl'), 'rb') as f:
    test_data = pickle.load(f)
    
test_filepaths = glob.glob(join_path('test/*'))

test_ds = TensorDataset(torch.tensor(test_data).unsqueeze(1))
test_dl = DataLoader(test_ds, batch_size=64)

model.eval()
preds = []
for batch in tqdm.tqdm(test_dl):
    batch = batch[0].to(device)
    pred = model(batch)
    preds.append(pred.cpu().detach().numpy())
preds = np.vstack(preds)

In [None]:
sub = pd.DataFrame(preds)
sub.columns = [label for label in cats]
sub['fname'] = [fp[-12:] for fp in test_filepaths]
sub.to_csv("sub3.csv", index=False)