In [None]:
import numpy as np
import pandas as pd
import numpy as np
import os
import gc

NUM_CLASSES = 30
num_workers = 0
use_global_normalization = True

In [None]:
raw_train_data, test_data = np.load('raw_train_data.npz', allow_pickle=True), np.load('test_data_mel32.npz')

In [None]:
for k,v in raw_train_data.items():
    locals()[k] = v
for k,v in test_data.items():
    locals()[k] = v
    
x_train.shape, y_train.shape, x_test.shape, test_keys.shape

In [None]:
import keras4torch as k4t
import torch
import torch.nn as nn

from dataset import SpeechCommandsDataset
from models import wideresnet

def build_model():
    model = wideresnet(depth=28, widen_factor=10, num_classes=NUM_CLASSES)

    model = k4t.Model(model).build([1, 32, 32])
    
    model.compile(optimizer=torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-2), 
                    loss=k4t.losses.CELoss(label_smoothing=0.1),
                    metrics=['acc'], device='cuda')

    return model

In [None]:
from torch.utils.data import DataLoader
from transforms import *

normal_transform = Compose([crop_or_pad, ToLogMelspectrogram(config='1x32x32')])

if use_global_normalization:
    norm = GlobalNormalization(config='mel32')
    normal_transform = Compose([normal_transform, norm])
    x_test = norm(x_test)

data_aug_transform = Compose([TimeShift(), ChangeAmplitude(), ChangeSpeedAndPitch(), normal_transform])

def make_dataloader(x_trn, y_trn, x_val, y_val):
    trn_loader = DataLoader(
        SpeechCommandsDataset(x_trn, y_trn, data_aug_transform),
            batch_size=96, pin_memory=True, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(
        SpeechCommandsDataset(x_val, y_val, normal_transform, use_cache=True),
            batch_size=96, pin_memory=True, shuffle=False, num_workers=num_workers)
    return trn_loader, val_loader

In [None]:
from sklearn.model_selection import StratifiedKFold
from torch.optim.lr_scheduler import MultiStepLR

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2020)

y_proba = np.zeros([len(x_test), NUM_CLASSES]).astype(np.float32)
model_name = 'wideresnet28'

for i, (trn, val) in enumerate(kfold.split(x_train, y_train)):
    print(f'Processing fold {i}:')

    model = build_model()
    
    lr_scheduler = MultiStepLR(model.trainer.optimizer, milestones=[13, 20, 27, 34], gamma=0.3)
    lr_scheduler = k4t.callbacks.LRScheduler(lr_scheduler)

    model_checkpoint = k4t.callbacks.ModelCheckpoint(f'best_{model_name}_{i}.pt', monitor='val_acc')

    trn_loader, val_loader = make_dataloader(x_train[trn], y_train[trn], x_train[val], y_train[val])

    history = model.fit_dl(trn_loader,
            epochs=40,
            val_loader=val_loader,
            callbacks=[model_checkpoint, lr_scheduler]
    )
  
    model.load_weights(f'best_{model_name}_{i}.pt')
    print(model.evaluate_dl(val_loader))
    y_proba += model.predict(x_test, activation=nn.Softmax(-1))

y_proba /= kfold.n_splits
np.save(f'{model_name}_{kfold.n_splits}foldcv_proba.npy', y_proba)