# KaldiTorch notebook

#### Packages

In [1]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from torchsummary import summary

from models import DWaveNet
from datasets import SequenceDataset
from utils import ScheduledOptim

import os
import math
import time

import numpy as np
import kaldi_io
import kaldiio
import librosa
from tqdm.notebook import tqdm


In [2]:
train_dataset = SequenceDataset('../data/train/trainLAB.scp', '../data/utt2spk.scp', min_length = 16000)
test_dataset = SequenceDataset('../data/test/testLAB.scp', '../data/utt2spk.scp', min_length = 16000)

Totally 7344 samples with at most 247 samples for one class
Totally 816 samples with at most 247 samples for one class


In [3]:
train_data = DataLoader(train_dataset, batch_size = 16, shuffle=True)
test_data = DataLoader(test_dataset, batch_size = 16, shuffle=True)

In [4]:
model = DWaveNet(in_channels=1, num_layers=10, num_stacks=1, residual_channels=128, gate_channels=128, skip_out_channels=128,
                last_channels=(2048, 256))

In [5]:
summary(model, train_dataset[0][0].shape, device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1           [-1, 128, 16000]             384
            Conv1d-2           [-1, 128, 16000]          49,152
            Conv1d-3           [-1, 128, 16000]           8,192
            Conv1d-4           [-1, 128, 16000]           8,192
 ResidualConv1dGLU-5        [-1, 2, 128, 16000]               0
            Conv1d-6           [-1, 128, 16000]          49,152
            Conv1d-7           [-1, 128, 16000]           8,192
            Conv1d-8           [-1, 128, 16000]           8,192
 ResidualConv1dGLU-9        [-1, 2, 128, 16000]               0
           Conv1d-10           [-1, 128, 16000]          49,152
           Conv1d-11           [-1, 128, 16000]           8,192
           Conv1d-12           [-1, 128, 16000]           8,192
ResidualConv1dGLU-13        [-1, 2, 128, 16000]               0
           Conv1d-14           [-1, 128

In [6]:
os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1, 2, 3'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [7]:
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device

In [8]:
model = nn.DataParallel(model)

In [9]:
model = model.to(device)

In [10]:
optimizer = ScheduledOptim( # Transformer optimizer
        torch.optim.Adam(
            filter(lambda p: p.requires_grad,
                   model.parameters()),
            betas = (0.9, 0.98),
             eps = 1e-09,
              weight_decay = 1e-4,
               amsgrad = True),
                n_warmup_steps = 8000)

In [11]:
#log_dir = "log/"
def train(n_epochs, loaders, model, optimizer, use_cuda, batch_verbose):#, save_path):
    for epoch in range(1, n_epochs+1):

        print('Epoch %d' % epoch)
        start = time.time()
        model.train()
        train_loss = 0.0
        for batch_idx, (data, target, _) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.to(device), target.to(device)
            #print(data.shape, target.shape)
            optimizer.zero_grad()
            output = model(data)
            #output = output.view((-1,))
            #print(output.shape)
            a = torch.nn.L1Loss()(output, target)
            b = torch.nn.MSELoss()(output, target)
            loss = a + b
            loss.backward()
            optimizer.step()
            lr = optimizer.update_learning_rate()
            if batch_idx % batch_verbose == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tlr:{:.5f}\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(loaders['train'].dataset),
                100. * batch_idx / len(loaders['train']), lr, loss.item()))
            #train.update(np.random.randint(300, 800+1)) # 3-8s chunk
            train_loss += loss
            del data, target, output, loss, a, b
        train_loss /= len(loaders['train'].dataset)
        print('Train epoch {} completed in {:.3f} minutes with total train loss: {:.3f}'.format(epoch, (time.time()-start)/60, train_loss))
      
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for data, target, _ in loaders['valid']:
                if use_cuda:
                    data, target = data.to(device), target.to(device)
                output = model(data)
                a = torch.nn.L1Loss()(output, target).item() # sum up batch loss
                b = torch.nn.MSELoss()(output, target).item()
                test_loss += (a+b)
                # if asoftmax == 'True': # angular-softmax
                #     output = output[0] # 0=cos_theta 1=phi_theta
                # pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
                # correct += pred.eq(target.view_as(pred)).sum().item()
              
        test_loss /= len(loaders['valid'].dataset)
        print('\nTest set: Average loss: {:.4f}\n'.format(test_loss))

        del data, target, output, test_loss, a, b

      # if 100. * correct / len(val_loader.dataset) > best:
      #     best = 100. * correct / len(val_loader.dataset)
      #     torch.save({
      #         'epoch': epoch,
      #         'state_dict': model.state_dict(),
      #         'best_acc': best,
      #         'optimizer' : optimizer.state_dict(),
      #     }, log_dir + str(epoch) + "_" + str(int(100. * correct / len(val_loader.dataset))) + ".h5")
      #     print("===> save to checkpoint at {}\n".format(log_dir + 'model_best.pth.tar'))
      #     shutil.copyfile(log_dir + str(epoch) + "_" + str(int(100. * correct / len(val_loader.dataset))) +
      #             ".h5", log_dir + 'model_best.pth.tar')
      #     best_epoch = epoch
      # elif epoch - best_epoch > 2:
      #     optimizer.increase_delta()
      #     best_epoch = epoch
    return model

In [12]:
loaders = {'train':train_data, 'valid':test_data}

In [13]:
train(n_epochs=3, loaders=loaders, model=model, optimizer=optimizer, use_cuda = True, batch_verbose = 10)

Epoch 1
Train epoch 1 completed in 14.736 minutes with total train loss: 0.002


ValueError: too many values to unpack (expected 2)

In [23]:
!nvidia-smi

Fri Jun 12 17:13:55 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 440.64.00    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           On   | 00000000:83:00.0 Off |                    0 |
| N/A   72C    P0    59W / 149W |   8400MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           On   | 00000000:84:00.0 Off |                    0 |
| N/A   23C    P8    30W / 149W |     11MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  Tesla K80           On   | 00000000:89:00.0 Off |                    0 |
| N/A   

<hr>