# Task description
- Classify the speakers of given features.
- Main goal: Learn how to use transformer.
- Baselines:
  - Easy: Run sample code and know how to use transformer.
  - Medium: Know how to adjust parameters of transformer.
  - Hard: Construct [conformer](https://arxiv.org/abs/2005.08100) which is a variety of transformer. 

In [102]:
%reset -f

# Download dataset

In [103]:
from google.colab import drive
drive.mount('/content/gdrive')
!unzip ./gdrive/MyDrive/Colab\ Notebooks/HW4/Dataset.zip

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# CONFIG

In [104]:
import torch
import numpy as np

CONFIG = {
    "MODEL_PATH": "gdrive/MyDrive/Colab Notebooks/HW4/models/model_conformer",
    "BEST_VAL_PATH": "gdrive/MyDrive/Colab Notebooks/HW4/models/best_val_conformer",  
    'SEGMENT_LEN': 128,
}

# set random seed for reproducibility
SEED = 12345
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

# Data

## Dataset
- Original dataset is [Voxceleb1](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/).
- The [license](https://creativecommons.org/licenses/by/4.0/) and [complete version](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/files/license.txt) of Voxceleb1.
- We randomly select 600 speakers from Voxceleb1.
- Then preprocess the raw waveforms into mel-spectrograms.

- Args:
  - data_dir: The path to the data directory.
  - metadata_path: The path to the metadata.
  - segment_len: The length of audio segment for training. 
- The architecture of data directory \\
  - data directory \\
  |---- metadata.json \\
  |---- testdata.json \\
  |---- mapping.json \\
  |---- uttr-{random string}.pt \\

- The information in metadata
  - "n_mels": The dimention of mel-spectrogram.
  - "speakers": A dictionary. 
    - Key: speaker ids.
    - value: "feature_path" and "mel_len"


For efficiency, we segment the mel-spectrograms into segments in the traing step.

In [105]:
import os
import json
import torch
import random
from pathlib import Path
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
 
class myDataset(Dataset):
  def __init__(self, data_dir, segment_len=CONFIG['SEGMENT_LEN']):
    self.data_dir = data_dir
    self.segment_len = segment_len
 
    # Load the mapping from speaker neme to their corresponding id. 
    mapping_path = Path(data_dir) / "mapping.json"
    mapping = json.load(mapping_path.open())
    self.speaker2id = mapping["speaker2id"]
 
    # Load metadata of training data.
    metadata_path = Path(data_dir) / "metadata.json"
    metadata = json.load(open(metadata_path))["speakers"]
 
    # Get the total number of speaker.
    self.speaker_num = len(metadata.keys())
    self.data = []
    for speaker in metadata.keys():
      for utterances in metadata[speaker]:
        self.data.append([utterances["feature_path"], self.speaker2id[speaker]])
 
  def __len__(self):
    return len(self.data)
 
  def __getitem__(self, index):
    feat_path, speaker = self.data[index]
    # Load preprocessed mel-spectrogram.
    mel = torch.load(os.path.join(self.data_dir, feat_path))
 
    # Segmemt mel-spectrogram into "segment_len" frames.
    if len(mel) > self.segment_len:
      # Randomly get the starting point of the segment.
      start = random.randint(0, len(mel) - self.segment_len)
      # Get a segment with "segment_len" frames.
      mel = torch.FloatTensor(mel[start:start+self.segment_len])
    else:
      mel = torch.FloatTensor(mel)
          
    # Turn the speaker id into long for computing loss later.
    speaker = torch.FloatTensor([speaker]).long()
    return mel, speaker
 
  def get_speaker_number(self):
    return self.speaker_num

## Dataloader
- Split dataset into training dataset(90%) and validation dataset(10%).
- Create dataloader to iterate the data.


In [106]:
import torch
from torch.utils.data import DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence


def collate_batch(batch):
  # Process features within a batch.
  """Collate a batch of data."""
  mel, speaker = zip(*batch)
  # Because we train the model batch by batch, we need to pad the features in the same batch to make their lengths the same.
  mel = pad_sequence(mel, batch_first=True)
  # mel: (batch size, length, 40)
  return mel, torch.FloatTensor(speaker).long()


def get_dataloader(data_dir, batch_size, n_workers):
  """Generate dataloader"""
  dataset = myDataset(data_dir)
  speaker_num = dataset.get_speaker_number()
  # Split dataset into training dataset and validation dataset
  trainlen = int(0.9 * len(dataset))
  lengths = [trainlen, len(dataset) - trainlen]
  trainset, validset = random_split(dataset, lengths)

  train_loader = DataLoader(
    trainset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=n_workers,
    pin_memory=True,
    collate_fn=collate_batch,
  )
  valid_loader = DataLoader(
    validset,
    batch_size=batch_size,
    num_workers=n_workers,
    drop_last=True,
    pin_memory=True,
    collate_fn=collate_batch,
  )

  return train_loader, valid_loader, speaker_num


# Model
- TransformerEncoderLayer:
  - Base transformer encoder layer in [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
  - Parameters:
    - d_model: the number of expected features of the input (required).

    - nhead: the number of heads of the multiheadattention models (required).

    - dim_feedforward: the dimension of the feedforward network model (default=2048).

    - dropout: the dropout value (default=0.1).

    - activation: the activation function of intermediate layer, relu or gelu (default=relu).

- TransformerEncoder:
  - TransformerEncoder is a stack of N transformer encoder layers
  - Parameters:
    - encoder_layer: an instance of the TransformerEncoderLayer() class (required).

    - num_layers: the number of sub-encoder-layers in the encoder (required).

    - norm: the layer normalization component (optional).

In [107]:
# reference: https://github.com/sooftware/conformer
import sys
sys.path.append('gdrive/MyDrive/Colab Notebooks/HW4/conformer')
from conformer.encoder import ConformerEncoder



In [108]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Classifier(nn.Module):
  def __init__(self, d_model=256, n_spks=600, dropout=0.1):
    super().__init__()
    segment_len = CONFIG['SEGMENT_LEN']
    input_dim = 40

    # Normalization
    self.bn0 = nn.BatchNorm1d(input_dim, momentum=0.1)

    self.encoder = ConformerEncoder(input_dim=input_dim, encoder_dim=d_model, conv_kernel_size=31, num_layers=1)

    self.linear1 = nn.Linear(d_model, 1024)
    self.bn1 = nn.BatchNorm1d(1024, momentum=0.1)

    self.out = nn.Linear(1024, n_spks)

    # activation function and dropout
    self.act_fn = nn.ReLU(inplace=True)
    self.dropout = nn.Dropout(p=0.5)

  def forward(self, inputs):
    """
    B: batch_size, S: segment_length, F: feature_num
    I: input_dim(40), O: output_dim(n_spks)

    bn: expect shape of (B, F, S)
    linear: expect shape of (B, *, F)
    encoder: expect shape of (B, S, F)
    conv: expect shape of (B, F, S)

    args:
      inputs: (B, S, F/I)
    return:
      outs: (B, F/O)
    """
    # normalize input
    inputs = inputs.transpose(1, 2) # (B, F, S)
    inputs = self.bn0(inputs) # (B, F, S)
    inputs = inputs.transpose(1, 2) # (B, S, F)

    # transformer
    inputs, _ = self.encoder(inputs, torch.LongTensor(inputs.shape[0])) # (B, S, F)

    # get mean of feature
    inputs = inputs.mean(dim=1) # (B, F)

    # ff_layer
    inputs = self.linear1(inputs) # (B, F)
    inputs = self.bn1(inputs) # (B, F)
    inputs = self.act_fn(inputs)
    inputs = self.dropout(inputs)

    # output
    outputs = self.out(inputs) # (B, F)

    return outputs


# Learning rate schedule
- For transformer architecture, the design of learning rate schedule is different from that of CNN.
- Previous works show that the warmup of learning rate is useful for training models with transformer architectures.
- The warmup schedule
  - Set learning rate to 0 in the beginning.
  - The learning rate increases linearly from 0 to initial learning rate during warmup period.

In [109]:
import math

import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import LambdaLR

def get_cosine_schedule_with_warmup(
  optimizer: Optimizer,
  num_warmup_steps: int,
  num_training_steps: int,
  num_cycles: float = 0.5,
  last_epoch: int = -1,
):
  """
  Create a schedule with a learning rate that decreases following the values of the cosine function between the
  initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
  initial lr set in the optimizer.

  Args:
    optimizer (:class:`~torch.optim.Optimizer`):
      The optimizer for which to schedule the learning rate.
    num_warmup_steps (:obj:`int`):
      The number of steps for the warmup phase.
    num_training_steps (:obj:`int`):
      The total number of training steps.
    num_cycles (:obj:`float`, `optional`, defaults to 0.5):
      The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
      following a half-cosine).
    last_epoch (:obj:`int`, `optional`, defaults to -1):
      The index of the last epoch when resuming training.

  Return:
    :obj:`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
  """

  def lr_lambda(current_step):
    # Warmup
    if current_step < num_warmup_steps:
      return float(current_step) / float(max(1, num_warmup_steps))
    # decadence
    progress = float(current_step - num_warmup_steps) / float(
      max(1, num_training_steps - num_warmup_steps)
    )
    return max(
      0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
    )

  return LambdaLR(optimizer, lr_lambda, last_epoch)


# Model Function
- Model forward function.

In [110]:
import torch


def model_fn(batch, model, criterion, device):
  """Forward a batch through the model."""

  mels, labels = batch
  mels = mels.to(device)
  labels = labels.to(device)

  outs = model(mels)

  loss = criterion(outs, labels)

  # Get the speaker id with highest probability.
  preds = outs.argmax(1)
  # Compute accuracy.
  accuracy = torch.mean((preds == labels).float())

  return loss, accuracy


# Validate
- Calculate accuracy of the validation set.

In [111]:
from tqdm import tqdm
import torch

def valid(dataloader, model, criterion, device): 
  """Validate on validation set."""

  model.eval()
  running_loss = 0.0
  running_accuracy = 0.0
  pbar = tqdm(total=len(dataloader.dataset), ncols=0, desc="Valid", unit=" uttr", position=0, leave=True)

  for i, batch in enumerate(dataloader):
    with torch.no_grad():
      loss, accuracy = model_fn(batch, model, criterion, device)
      running_loss += loss.item()
      running_accuracy += accuracy.item()

    pbar.update(dataloader.batch_size)
    pbar.set_postfix(
      loss=f"{running_loss / (i+1):.5f}",
      accuracy=f"{running_accuracy / (i+1):.5f}",
    )

  pbar.close()
  model.train()

  return running_accuracy / len(dataloader)


# Main function (Training)

In [None]:
import os
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

def parse_args():
  """arguments"""
  config = {
    "data_dir": "./Dataset",
      "model_path": CONFIG['MODEL_PATH'],
    "best_val_path": CONFIG['BEST_VAL_PATH'],
    "batch_size": 32,
    "n_workers": 8,
    "valid_steps": 2000,
    "warmup_steps": 0,
    "save_steps": 2000,
    "total_steps": 200000,
  }

  return config


def main(
  data_dir,
  model_path,
  best_val_path,
  batch_size,
  n_workers,
  valid_steps,
  warmup_steps,
  total_steps,
  save_steps,
):
  """Main function."""
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  print(f"[Info]: Use {device} now!")

  train_loader, valid_loader, speaker_num = get_dataloader(data_dir, batch_size, n_workers)
  train_iterator = iter(train_loader)
  print(f"[Info]: Finish loading data!",flush = True)

  model = Classifier(n_spks=speaker_num).to(device)
  if os.path.isfile(model_path):
    print('loading previous model parameters...')
    ckpt = torch.load(model_path, map_location='cpu')  # Load your best model
    model.load_state_dict(ckpt)

  criterion = nn.CrossEntropyLoss()
  optimizer = AdamW(model.parameters())
  # scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
  print(f"[Info]: Finish creating model!",flush = True)

  best_accuracy = -1.0
  best_state_dict = None
  if os.path.isfile(best_val_path):
    with open(best_val_path, 'r') as f:
      best_accuracy = float(f.read())
      print(f'best_accuracy: {best_accuracy}')

  pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step", position=0, leave=True)

  tot_batch_loss = 0
  tot_batch_accuracy = 0
  tot_step = 0
  for step in range(total_steps):
    # Get data
    try:
      batch = next(train_iterator)
    except StopIteration:
      train_iterator = iter(train_loader)
      batch = next(train_iterator)

    loss, accuracy = model_fn(batch, model, criterion, device)
    batch_loss = loss.item()
    batch_accuracy = accuracy.item()
    tot_batch_loss += batch_loss
    tot_batch_accuracy += batch_accuracy

    # Updata model
    loss.backward()
    optimizer.step()
    # scheduler.step()
    optimizer.zero_grad()
    
    # Log
    tot_step += 1
    pbar.update()
    pbar.set_postfix(
      loss=f"{tot_batch_loss/tot_step:.5f}",
      accuracy=f"{tot_batch_accuracy/tot_step:.5f}",
      step=step + 1,
    )

    # Do validation
    if (step + 1) % valid_steps == 0:
      tot_step = 0
      tot_batch_loss = 0
      tot_batch_accuracy = 0
      pbar.close()

      valid_accuracy = valid(valid_loader, model, criterion, device)

      # keep the best model
      if valid_accuracy > best_accuracy:
        best_accuracy = valid_accuracy
        best_state_dict = model.state_dict()

      pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step", position=0, leave=True)

    # Save the best model so far.
    if (step + 1) % save_steps == 0 and best_state_dict is not None:
      torch.save(best_state_dict, model_path)
      with open(best_val_path, 'w') as f:
        f.write(str(best_accuracy))

      pbar.write(f"Step {step + 1}, best model saved. (accuracy={best_accuracy:.4f})")

  pbar.close()


if __name__ == "__main__":
  main(**parse_args())


[Info]: Use cuda now!


  cpuset_checked))


[Info]: Finish loading data!
[Info]: Finish creating model!


Train: 100% 2000/2000 [03:16<00:00, 10.20 step/s, accuracy=0.20425, loss=4.16756, step=2000]
Valid: 100% 6944/6944 [00:27<00:00, 250.42 uttr/s, accuracy=0.38465, loss=2.80796]
Train:   0% 4/2000 [00:00<12:35,  2.64 step/s, accuracy=0.31250, loss=3.06093, step=2004]

Step 2000, best model saved. (accuracy=0.3846)


Train: 100% 2000/2000 [03:53<00:00,  8.58 step/s, accuracy=0.38453, loss=2.80420, step=4000]
Valid: 100% 6944/6944 [00:27<00:00, 255.09 uttr/s, accuracy=0.52059, loss=2.10977]
Train:   0% 4/2000 [00:00<05:16,  6.31 step/s, accuracy=0.42969, loss=2.50333, step=4004]

Step 4000, best model saved. (accuracy=0.5206)


Train: 100% 2000/2000 [03:52<00:00,  8.60 step/s, accuracy=0.47725, loss=2.28857, step=6000]
Valid: 100% 6944/6944 [00:26<00:00, 260.56 uttr/s, accuracy=0.58698, loss=1.77102]
Train:   0% 4/2000 [00:00<04:56,  6.73 step/s, accuracy=0.50000, loss=1.95734, step=6004]

Step 6000, best model saved. (accuracy=0.5870)


Train: 100% 2000/2000 [03:52<00:00,  8.60 step/s, accuracy=0.54444, loss=1.93624, step=8000]
Valid: 100% 6944/6944 [00:27<00:00, 254.17 uttr/s, accuracy=0.64127, loss=1.51869]
Train:   0% 4/2000 [00:00<05:05,  6.54 step/s, accuracy=0.57031, loss=1.80592, step=8004]

Step 8000, best model saved. (accuracy=0.6413)


Train: 100% 2000/2000 [03:51<00:00,  8.66 step/s, accuracy=0.59391, loss=1.70232, step=1e+4]
Valid: 100% 6944/6944 [00:27<00:00, 255.22 uttr/s, accuracy=0.68275, loss=1.30293]
Train:   0% 4/2000 [00:00<05:12,  6.39 step/s, accuracy=0.65625, loss=1.55690, step=1e+4]

Step 10000, best model saved. (accuracy=0.6827)


Train: 100% 2000/2000 [03:49<00:00,  8.72 step/s, accuracy=0.62917, loss=1.53534, step=12000]
Valid: 100% 6944/6944 [00:27<00:00, 253.80 uttr/s, accuracy=0.71904, loss=1.17548]
Train:   0% 4/2000 [00:00<05:46,  5.76 step/s, accuracy=0.67969, loss=1.20467, step=12004]

Step 12000, best model saved. (accuracy=0.7190)


Train: 100% 2000/2000 [03:48<00:00,  8.74 step/s, accuracy=0.65750, loss=1.37767, step=14000]
Valid: 100% 6944/6944 [00:27<00:00, 251.66 uttr/s, accuracy=0.73056, loss=1.10427]
Train:   0% 4/2000 [00:00<04:59,  6.67 step/s, accuracy=0.66406, loss=1.40523, step=14004]

Step 14000, best model saved. (accuracy=0.7306)


Train: 100% 2000/2000 [03:48<00:00,  8.75 step/s, accuracy=0.68237, loss=1.27835, step=16000]
Valid: 100% 6944/6944 [00:27<00:00, 252.82 uttr/s, accuracy=0.75562, loss=1.02885]
Train:   0% 4/2000 [00:00<04:59,  6.66 step/s, accuracy=0.77344, loss=1.02192, step=16004]

Step 16000, best model saved. (accuracy=0.7556)


Train: 100% 2000/2000 [03:48<00:00,  8.77 step/s, accuracy=0.70220, loss=1.18921, step=18000]
Valid: 100% 6944/6944 [00:28<00:00, 247.16 uttr/s, accuracy=0.77347, loss=0.93357]
Train:   0% 4/2000 [00:00<04:55,  6.76 step/s, accuracy=0.77344, loss=0.82014, step=18004]

Step 18000, best model saved. (accuracy=0.7735)


Train: 100% 2000/2000 [03:47<00:00,  8.78 step/s, accuracy=0.71931, loss=1.11660, step=2e+4]
Valid: 100% 6944/6944 [00:27<00:00, 252.77 uttr/s, accuracy=0.79003, loss=0.86372]
Train:   0% 4/2000 [00:00<05:12,  6.38 step/s, accuracy=0.77344, loss=0.85589, step=2e+4]

Step 20000, best model saved. (accuracy=0.7900)


Train: 100% 2000/2000 [03:47<00:00,  8.78 step/s, accuracy=0.73459, loss=1.04107, step=22000]
Valid: 100% 6944/6944 [00:28<00:00, 246.27 uttr/s, accuracy=0.79623, loss=0.83395]
Train:   0% 4/2000 [00:00<04:53,  6.80 step/s, accuracy=0.67188, loss=1.44915, step=22004]

Step 22000, best model saved. (accuracy=0.7962)


Train: 100% 2000/2000 [03:47<00:00,  8.81 step/s, accuracy=0.74509, loss=0.99208, step=24000]
Valid: 100% 6944/6944 [00:27<00:00, 252.05 uttr/s, accuracy=0.79493, loss=0.82355]
Train:   0% 5/2000 [00:00<04:55,  6.76 step/s, accuracy=0.75625, loss=1.03382, step=24005]

Step 24000, best model saved. (accuracy=0.7962)


Train: 100% 2000/2000 [03:46<00:00,  8.84 step/s, accuracy=0.75820, loss=0.93970, step=26000]
Valid: 100% 6944/6944 [00:28<00:00, 242.14 uttr/s, accuracy=0.81639, loss=0.73942]
Train:   0% 5/2000 [00:00<05:18,  6.26 step/s, accuracy=0.78750, loss=0.75185, step=26005]

Step 26000, best model saved. (accuracy=0.8164)


Train: 100% 2000/2000 [03:46<00:00,  8.83 step/s, accuracy=0.76552, loss=0.90075, step=28000]
Valid: 100% 6944/6944 [00:28<00:00, 244.56 uttr/s, accuracy=0.82560, loss=0.72453]
Train:   0% 4/2000 [00:00<05:21,  6.21 step/s, accuracy=0.71875, loss=1.03268, step=28004]

Step 28000, best model saved. (accuracy=0.8256)


Train: 100% 2000/2000 [03:46<00:00,  8.81 step/s, accuracy=0.77722, loss=0.86479, step=3e+4]
Valid: 100% 6944/6944 [00:27<00:00, 249.06 uttr/s, accuracy=0.82560, loss=0.71800]
Train:   0% 5/2000 [00:00<05:42,  5.82 step/s, accuracy=0.76250, loss=0.77052, step=3e+4]

Step 30000, best model saved. (accuracy=0.8256)


Train: 100% 2000/2000 [03:42<00:00,  8.99 step/s, accuracy=0.78464, loss=0.82605, step=32000]
Valid: 100% 6944/6944 [00:28<00:00, 241.81 uttr/s, accuracy=0.83007, loss=0.69448]
Train:   0% 4/2000 [00:00<05:00,  6.64 step/s, accuracy=0.75000, loss=0.72662, step=32004]

Step 32000, best model saved. (accuracy=0.8301)


Train: 100% 2000/2000 [03:43<00:00,  8.94 step/s, accuracy=0.79539, loss=0.77862, step=34000]
Valid: 100% 6944/6944 [00:29<00:00, 237.18 uttr/s, accuracy=0.83396, loss=0.68939]
Train:   0% 4/2000 [00:00<05:02,  6.59 step/s, accuracy=0.78125, loss=0.87091, step=34004]

Step 34000, best model saved. (accuracy=0.8340)


Train: 100% 2000/2000 [03:42<00:00,  8.99 step/s, accuracy=0.79622, loss=0.77661, step=36000]
Valid: 100% 6944/6944 [00:29<00:00, 237.36 uttr/s, accuracy=0.84317, loss=0.65618]
Train:   0% 5/2000 [00:00<05:01,  6.62 step/s, accuracy=0.83750, loss=0.57472, step=36005]

Step 36000, best model saved. (accuracy=0.8432)


Train: 100% 2000/2000 [03:40<00:00,  9.07 step/s, accuracy=0.80444, loss=0.73833, step=38000]
Valid: 100% 6944/6944 [00:29<00:00, 238.24 uttr/s, accuracy=0.84159, loss=0.64788]
Train:   0% 5/2000 [00:00<05:12,  6.39 step/s, accuracy=0.82500, loss=0.68187, step=38005]

Step 38000, best model saved. (accuracy=0.8432)


Train: 100% 2000/2000 [03:39<00:00,  9.10 step/s, accuracy=0.80894, loss=0.72144, step=4e+4]
Valid: 100% 6944/6944 [00:29<00:00, 237.32 uttr/s, accuracy=0.85325, loss=0.62376]
Train:   0% 4/2000 [00:00<05:27,  6.09 step/s, accuracy=0.78125, loss=0.64880, step=4e+4]

Step 40000, best model saved. (accuracy=0.8533)


Train: 100% 2000/2000 [03:39<00:00,  9.10 step/s, accuracy=0.81183, loss=0.70415, step=42000]
Valid: 100% 6944/6944 [00:28<00:00, 241.51 uttr/s, accuracy=0.85757, loss=0.59540]
Train:   0% 4/2000 [00:00<05:47,  5.75 step/s, accuracy=0.78906, loss=0.85606, step=42004]

Step 42000, best model saved. (accuracy=0.8576)


Train: 100% 2000/2000 [03:38<00:00,  9.17 step/s, accuracy=0.81969, loss=0.67809, step=44000]
Valid: 100% 6944/6944 [00:28<00:00, 241.58 uttr/s, accuracy=0.86247, loss=0.58195]
Train:   0% 4/2000 [00:00<05:26,  6.12 step/s, accuracy=0.82812, loss=0.68309, step=44004]

Step 44000, best model saved. (accuracy=0.8625)


Train: 100% 2000/2000 [03:38<00:00,  9.14 step/s, accuracy=0.82325, loss=0.65546, step=46000]
Valid: 100% 6944/6944 [00:30<00:00, 228.59 uttr/s, accuracy=0.85426, loss=0.59459]
Train:   0% 4/2000 [00:00<07:23,  4.50 step/s, accuracy=0.84375, loss=0.70388, step=46004]

Step 46000, best model saved. (accuracy=0.8625)


Train: 100% 2000/2000 [03:36<00:00,  9.22 step/s, accuracy=0.82684, loss=0.64435, step=48000]
Valid: 100% 6944/6944 [00:30<00:00, 227.85 uttr/s, accuracy=0.86550, loss=0.56383]
Train:   0% 4/2000 [00:00<06:37,  5.02 step/s, accuracy=0.85156, loss=0.56842, step=48004]

Step 48000, best model saved. (accuracy=0.8655)


Train: 100% 2000/2000 [03:37<00:00,  9.21 step/s, accuracy=0.83123, loss=0.62806, step=5e+4]
Valid: 100% 6944/6944 [00:30<00:00, 227.39 uttr/s, accuracy=0.86161, loss=0.57031]
Train:   0% 4/2000 [00:00<07:34,  4.39 step/s, accuracy=0.84375, loss=0.59801, step=5e+4]

Step 50000, best model saved. (accuracy=0.8655)


Train: 100% 2000/2000 [03:39<00:00,  9.12 step/s, accuracy=0.83466, loss=0.61030, step=52000]
Valid: 100% 6944/6944 [00:29<00:00, 232.30 uttr/s, accuracy=0.87010, loss=0.55063]
Train:   0% 4/2000 [00:00<07:44,  4.30 step/s, accuracy=0.83594, loss=0.59813, step=52004]

Step 52000, best model saved. (accuracy=0.8701)


Train: 100% 2000/2000 [03:42<00:00,  8.99 step/s, accuracy=0.84186, loss=0.58991, step=54000]
Valid: 100% 6944/6944 [00:30<00:00, 228.74 uttr/s, accuracy=0.87442, loss=0.51337]
Train:   0% 4/2000 [00:00<07:02,  4.73 step/s, accuracy=0.81250, loss=0.61904, step=54004]

Step 54000, best model saved. (accuracy=0.8744)


Train: 100% 2000/2000 [03:43<00:00,  8.95 step/s, accuracy=0.83916, loss=0.59852, step=56000]
Valid: 100% 6944/6944 [00:30<00:00, 228.46 uttr/s, accuracy=0.86722, loss=0.55675]
Train:   0% 4/2000 [00:00<07:28,  4.45 step/s, accuracy=0.81250, loss=0.62900, step=56004]

Step 56000, best model saved. (accuracy=0.8744)


Train: 100% 2000/2000 [03:44<00:00,  8.90 step/s, accuracy=0.84523, loss=0.57350, step=58000]
Valid: 100% 6944/6944 [00:29<00:00, 233.50 uttr/s, accuracy=0.87716, loss=0.52609]
Train:   0% 4/2000 [00:00<07:43,  4.31 step/s, accuracy=0.82031, loss=0.65003, step=58004]

Step 58000, best model saved. (accuracy=0.8772)


Train: 100% 2000/2000 [03:45<00:00,  8.86 step/s, accuracy=0.84564, loss=0.56740, step=6e+4]
Valid: 100% 6944/6944 [00:30<00:00, 228.31 uttr/s, accuracy=0.87198, loss=0.54547]
Train:   0% 4/2000 [00:00<06:12,  5.37 step/s, accuracy=0.86719, loss=0.48727, step=6e+4]

Step 60000, best model saved. (accuracy=0.8772)


Train: 100% 2000/2000 [03:46<00:00,  8.82 step/s, accuracy=0.85130, loss=0.54608, step=62000]
Valid: 100% 6944/6944 [00:29<00:00, 234.11 uttr/s, accuracy=0.87918, loss=0.50224]
Train:   0% 4/2000 [00:00<07:03,  4.72 step/s, accuracy=0.89062, loss=0.36236, step=62004]

Step 62000, best model saved. (accuracy=0.8792)


Train: 100% 2000/2000 [03:50<00:00,  8.68 step/s, accuracy=0.85027, loss=0.54465, step=64000]
Valid: 100% 6944/6944 [00:29<00:00, 236.25 uttr/s, accuracy=0.88018, loss=0.49406]
Train:   0% 4/2000 [00:00<07:37,  4.37 step/s, accuracy=0.81250, loss=0.77562, step=64004]

Step 64000, best model saved. (accuracy=0.8802)


Train: 100% 2000/2000 [03:51<00:00,  8.66 step/s, accuracy=0.85166, loss=0.54152, step=66000]
Valid: 100% 6944/6944 [00:29<00:00, 234.73 uttr/s, accuracy=0.87442, loss=0.51111]
Train:   0% 4/2000 [00:00<06:58,  4.77 step/s, accuracy=0.89062, loss=0.43548, step=66004]

Step 66000, best model saved. (accuracy=0.8802)


Train: 100% 2000/2000 [03:52<00:00,  8.59 step/s, accuracy=0.85817, loss=0.51991, step=68000]
Valid: 100% 6944/6944 [00:29<00:00, 237.69 uttr/s, accuracy=0.87946, loss=0.50964]
Train:   0% 4/2000 [00:00<07:28,  4.45 step/s, accuracy=0.84375, loss=0.57728, step=68004]

Step 68000, best model saved. (accuracy=0.8802)


Train: 100% 2000/2000 [03:53<00:00,  8.55 step/s, accuracy=0.85809, loss=0.51437, step=7e+4]
Valid: 100% 6944/6944 [00:29<00:00, 232.18 uttr/s, accuracy=0.88378, loss=0.48784]
Train:   0% 4/2000 [00:00<08:35,  3.87 step/s, accuracy=0.78125, loss=0.69507, step=7e+4]

Step 70000, best model saved. (accuracy=0.8838)


Train: 100% 2000/2000 [03:56<00:00,  8.47 step/s, accuracy=0.86119, loss=0.50855, step=72000]
Valid: 100% 6944/6944 [00:29<00:00, 236.42 uttr/s, accuracy=0.87817, loss=0.50714]
Train:   0% 4/2000 [00:00<07:29,  4.44 step/s, accuracy=0.86719, loss=0.45456, step=72004]

Step 72000, best model saved. (accuracy=0.8838)


Train: 100% 2000/2000 [03:56<00:00,  8.47 step/s, accuracy=0.86253, loss=0.50033, step=74000]
Valid: 100% 6944/6944 [00:29<00:00, 237.88 uttr/s, accuracy=0.87961, loss=0.48919]
Train:   0% 4/2000 [00:00<07:55,  4.20 step/s, accuracy=0.83594, loss=0.54612, step=74004]

Step 74000, best model saved. (accuracy=0.8838)


Train: 100% 2000/2000 [03:57<00:00,  8.42 step/s, accuracy=0.86453, loss=0.49207, step=76000]
Valid: 100% 6944/6944 [00:29<00:00, 235.79 uttr/s, accuracy=0.88753, loss=0.48502]
Train:   0% 4/2000 [00:00<07:15,  4.58 step/s, accuracy=0.84375, loss=0.66413, step=76004]

Step 76000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:57<00:00,  8.43 step/s, accuracy=0.86662, loss=0.48256, step=78000]
Valid: 100% 6944/6944 [00:28<00:00, 243.83 uttr/s, accuracy=0.88321, loss=0.49195]
Train:   0% 4/2000 [00:00<08:29,  3.92 step/s, accuracy=0.82812, loss=0.60741, step=78004]

Step 78000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:57<00:00,  8.42 step/s, accuracy=0.86595, loss=0.48780, step=8e+4]
Valid: 100% 6944/6944 [00:29<00:00, 239.26 uttr/s, accuracy=0.88393, loss=0.49070]
Train:   0% 5/2000 [00:00<08:04,  4.12 step/s, accuracy=0.80000, loss=0.71149, step=8e+4]

Step 80000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:57<00:00,  8.43 step/s, accuracy=0.86762, loss=0.47802, step=82000]
Valid: 100% 6944/6944 [00:27<00:00, 254.68 uttr/s, accuracy=0.88695, loss=0.47241]
Train:   0% 4/2000 [00:00<06:02,  5.50 step/s, accuracy=0.85938, loss=0.42395, step=82004]

Step 82000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:57<00:00,  8.44 step/s, accuracy=0.86997, loss=0.47295, step=84000]
Valid: 100% 6944/6944 [00:26<00:00, 258.44 uttr/s, accuracy=0.88522, loss=0.47621]
Train:   0% 4/2000 [00:00<05:44,  5.80 step/s, accuracy=0.88281, loss=0.62506, step=84004]

Step 84000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:57<00:00,  8.44 step/s, accuracy=0.87052, loss=0.46926, step=86000]
Valid: 100% 6944/6944 [00:28<00:00, 247.58 uttr/s, accuracy=0.88335, loss=0.47422]
Train:   0% 4/2000 [00:00<05:40,  5.86 step/s, accuracy=0.92969, loss=0.34294, step=86004]

Step 86000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:56<00:00,  8.45 step/s, accuracy=0.87425, loss=0.45513, step=88000]
Valid: 100% 6944/6944 [00:27<00:00, 254.21 uttr/s, accuracy=0.88666, loss=0.47441]
Train:   0% 4/2000 [00:00<05:20,  6.23 step/s, accuracy=0.88281, loss=0.40947, step=88004]

Step 88000, best model saved. (accuracy=0.8875)


Train: 100% 2000/2000 [03:56<00:00,  8.45 step/s, accuracy=0.87503, loss=0.45117, step=9e+4]
Valid: 100% 6944/6944 [00:27<00:00, 249.64 uttr/s, accuracy=0.89516, loss=0.44430]
Train:   0% 4/2000 [00:00<05:51,  5.68 step/s, accuracy=0.94531, loss=0.27336, step=9e+4]

Step 90000, best model saved. (accuracy=0.8952)


Train: 100% 2000/2000 [03:56<00:00,  8.47 step/s, accuracy=0.87513, loss=0.44815, step=92000]
Valid: 100% 6944/6944 [00:28<00:00, 240.10 uttr/s, accuracy=0.89545, loss=0.42761]
Train:   0% 4/2000 [00:00<05:31,  6.02 step/s, accuracy=0.91406, loss=0.30177, step=92004]

Step 92000, best model saved. (accuracy=0.8954)


Train: 100% 2000/2000 [03:56<00:00,  8.44 step/s, accuracy=0.87709, loss=0.44495, step=94000]
Valid: 100% 6944/6944 [00:28<00:00, 244.66 uttr/s, accuracy=0.88436, loss=0.47697]
Train:   0% 4/2000 [00:01<22:36,  1.47 step/s, accuracy=0.89844, loss=0.36272, step=94004]

Step 94000, best model saved. (accuracy=0.8954)


Train: 100% 2000/2000 [03:56<00:00,  8.47 step/s, accuracy=0.87944, loss=0.43608, step=96000]
Valid: 100% 6944/6944 [00:28<00:00, 241.25 uttr/s, accuracy=0.88998, loss=0.46385]
Train:   0% 4/2000 [00:00<06:47,  4.90 step/s, accuracy=0.85156, loss=0.46828, step=96004]

Step 96000, best model saved. (accuracy=0.8954)


Train: 100% 2000/2000 [03:55<00:00,  8.49 step/s, accuracy=0.87995, loss=0.43247, step=98000]
Valid: 100% 6944/6944 [00:28<00:00, 243.08 uttr/s, accuracy=0.89099, loss=0.45692]
Train:   0% 4/2000 [00:00<05:47,  5.75 step/s, accuracy=0.87500, loss=0.46707, step=98004]

Step 98000, best model saved. (accuracy=0.8954)


Train: 100% 2000/2000 [03:53<00:00,  8.57 step/s, accuracy=0.87820, loss=0.43507, step=1e+5]
Valid: 100% 6944/6944 [00:28<00:00, 246.36 uttr/s, accuracy=0.89055, loss=0.46881]
Train:   0% 4/2000 [00:00<05:19,  6.25 step/s, accuracy=0.91406, loss=0.29560, step=1e+5]

Step 100000, best model saved. (accuracy=0.8954)


Train: 100% 2000/2000 [03:53<00:00,  8.57 step/s, accuracy=0.88222, loss=0.42332, step=102000]
Valid: 100% 6944/6944 [00:28<00:00, 242.41 uttr/s, accuracy=0.89372, loss=0.45005]
Train:   0% 4/2000 [00:00<04:59,  6.67 step/s, accuracy=0.90625, loss=0.39642, step=102004]

Step 102000, best model saved. (accuracy=0.8954)


Train: 100% 2000/2000 [03:52<00:00,  8.58 step/s, accuracy=0.88462, loss=0.41402, step=104000]
Valid: 100% 6944/6944 [00:29<00:00, 233.52 uttr/s, accuracy=0.89574, loss=0.44676]
Train:   0% 4/2000 [00:00<05:24,  6.14 step/s, accuracy=0.88281, loss=0.38115, step=104004]

Step 104000, best model saved. (accuracy=0.8957)


Train: 100% 2000/2000 [03:53<00:00,  8.58 step/s, accuracy=0.88348, loss=0.41749, step=106000]
Valid: 100% 6944/6944 [00:29<00:00, 235.63 uttr/s, accuracy=0.89905, loss=0.43146]
Train:   0% 4/2000 [00:00<05:04,  6.56 step/s, accuracy=0.85156, loss=0.40832, step=106004]

Step 106000, best model saved. (accuracy=0.8990)


Train: 100% 2000/2000 [03:51<00:00,  8.62 step/s, accuracy=0.88255, loss=0.41961, step=108000]
Valid: 100% 6944/6944 [00:29<00:00, 235.36 uttr/s, accuracy=0.90092, loss=0.43305]
Train:   0% 4/2000 [00:00<05:28,  6.08 step/s, accuracy=0.89844, loss=0.28487, step=108004]

Step 108000, best model saved. (accuracy=0.9009)


Train: 100% 2000/2000 [03:52<00:00,  8.59 step/s, accuracy=0.88300, loss=0.41676, step=110000]
Valid: 100% 6944/6944 [00:29<00:00, 238.97 uttr/s, accuracy=0.89401, loss=0.44960]
Train:   0% 3/2000 [00:00<07:13,  4.61 step/s, accuracy=0.87500, loss=0.39825, step=110003]

Step 110000, best model saved. (accuracy=0.9009)


Train: 100% 2000/2000 [03:52<00:00,  8.61 step/s, accuracy=0.88648, loss=0.40708, step=112000]
Valid: 100% 6944/6944 [00:29<00:00, 231.58 uttr/s, accuracy=0.90135, loss=0.42426]
Train:   0% 4/2000 [00:01<23:06,  1.44 step/s, accuracy=0.89062, loss=0.40884, step=112004]

Step 112000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:52<00:00,  8.62 step/s, accuracy=0.88492, loss=0.40806, step=114000]
Valid: 100% 6944/6944 [00:30<00:00, 230.14 uttr/s, accuracy=0.89862, loss=0.43007]
Train:   0% 4/2000 [00:00<05:50,  5.70 step/s, accuracy=0.83594, loss=0.60916, step=114004]

Step 114000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:51<00:00,  8.64 step/s, accuracy=0.88702, loss=0.40335, step=116000]
Valid: 100% 6944/6944 [00:30<00:00, 229.44 uttr/s, accuracy=0.89559, loss=0.43832]
Train:   0% 4/2000 [00:00<05:43,  5.80 step/s, accuracy=0.84375, loss=0.58772, step=116004]

Step 116000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:48<00:00,  8.75 step/s, accuracy=0.88966, loss=0.39374, step=118000]
Valid: 100% 6944/6944 [00:30<00:00, 224.62 uttr/s, accuracy=0.89689, loss=0.42935]
Train:   0% 4/2000 [00:00<05:21,  6.20 step/s, accuracy=0.87500, loss=0.35571, step=118004]

Step 118000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:47<00:00,  8.78 step/s, accuracy=0.88873, loss=0.39608, step=120000]
Valid: 100% 6944/6944 [00:30<00:00, 227.06 uttr/s, accuracy=0.89271, loss=0.44252]
Train:   0% 4/2000 [00:00<08:28,  3.93 step/s, accuracy=0.85156, loss=0.54777, step=120004]

Step 120000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:49<00:00,  8.73 step/s, accuracy=0.88894, loss=0.38867, step=122000]
Valid: 100% 6944/6944 [00:30<00:00, 226.70 uttr/s, accuracy=0.90020, loss=0.42911]
Train:   0% 4/2000 [00:00<06:16,  5.30 step/s, accuracy=0.91406, loss=0.39054, step=122004]

Step 122000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:48<00:00,  8.73 step/s, accuracy=0.88933, loss=0.39268, step=124000]
Valid: 100% 6944/6944 [00:31<00:00, 217.13 uttr/s, accuracy=0.89891, loss=0.43259]
Train:   0% 5/2000 [00:00<06:11,  5.37 step/s, accuracy=0.86250, loss=0.44853, step=124005]

Step 124000, best model saved. (accuracy=0.9014)


Train: 100% 2000/2000 [03:48<00:00,  8.77 step/s, accuracy=0.89089, loss=0.39116, step=126000]
Valid: 100% 6944/6944 [00:31<00:00, 223.39 uttr/s, accuracy=0.90611, loss=0.41337]
Train:   0% 4/2000 [00:00<07:53,  4.22 step/s, accuracy=0.90625, loss=0.33887, step=126004]

Step 126000, best model saved. (accuracy=0.9061)


Train: 100% 2000/2000 [03:47<00:00,  8.78 step/s, accuracy=0.89095, loss=0.38548, step=128000]
Valid: 100% 6944/6944 [00:30<00:00, 228.52 uttr/s, accuracy=0.90783, loss=0.41017]
Train:   0% 4/2000 [00:00<06:50,  4.86 step/s, accuracy=0.91406, loss=0.37385, step=128004]

Step 128000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:47<00:00,  8.80 step/s, accuracy=0.89575, loss=0.37338, step=130000]
Valid: 100% 6944/6944 [00:30<00:00, 226.54 uttr/s, accuracy=0.90207, loss=0.43806]
Train:   0% 4/2000 [00:00<06:45,  4.92 step/s, accuracy=0.87500, loss=0.47300, step=130004]

Step 130000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:50<00:00,  8.67 step/s, accuracy=0.89212, loss=0.37990, step=132000]
Valid: 100% 6944/6944 [00:30<00:00, 224.73 uttr/s, accuracy=0.89862, loss=0.43244]
Train:   0% 4/2000 [00:00<08:02,  4.14 step/s, accuracy=0.91406, loss=0.30290, step=132004]

Step 132000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:49<00:00,  8.70 step/s, accuracy=0.89508, loss=0.37201, step=134000]
Valid: 100% 6944/6944 [00:31<00:00, 222.65 uttr/s, accuracy=0.89977, loss=0.42320]
Train:   0% 4/2000 [00:00<07:24,  4.49 step/s, accuracy=0.89062, loss=0.37686, step=134004]

Step 134000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:51<00:00,  8.66 step/s, accuracy=0.89164, loss=0.38548, step=136000]
Valid: 100% 6944/6944 [00:31<00:00, 220.70 uttr/s, accuracy=0.90740, loss=0.38159]
Train:   0% 4/2000 [00:00<06:07,  5.43 step/s, accuracy=0.92188, loss=0.32327, step=136004]

Step 136000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:51<00:00,  8.63 step/s, accuracy=0.89509, loss=0.37244, step=138000]
Valid: 100% 6944/6944 [00:31<00:00, 223.77 uttr/s, accuracy=0.90711, loss=0.40077]
Train:   0% 4/2000 [00:00<08:50,  3.76 step/s, accuracy=0.85938, loss=0.56201, step=138004]

Step 138000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:52<00:00,  8.60 step/s, accuracy=0.89328, loss=0.37604, step=140000]
Valid: 100% 6944/6944 [00:31<00:00, 220.77 uttr/s, accuracy=0.90179, loss=0.41712]
Train:   0% 4/2000 [00:00<06:44,  4.93 step/s, accuracy=0.91406, loss=0.41098, step=140004]

Step 140000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:56<00:00,  8.44 step/s, accuracy=0.89578, loss=0.37212, step=142000]
Valid: 100% 6944/6944 [00:30<00:00, 226.90 uttr/s, accuracy=0.90207, loss=0.40119]
Train:   0% 4/2000 [00:00<10:57,  3.03 step/s, accuracy=0.82031, loss=0.56154, step=142004]

Step 142000, best model saved. (accuracy=0.9078)


Train: 100% 2000/2000 [03:57<00:00,  8.43 step/s, accuracy=0.89686, loss=0.36175, step=144000]
Valid: 100% 6944/6944 [00:31<00:00, 221.91 uttr/s, accuracy=0.90827, loss=0.38829]
Train:   0% 4/2000 [00:00<06:21,  5.23 step/s, accuracy=0.89062, loss=0.42174, step=144004]

Step 144000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [03:59<00:00,  8.36 step/s, accuracy=0.89427, loss=0.37449, step=146000]
Valid: 100% 6944/6944 [00:30<00:00, 229.76 uttr/s, accuracy=0.90683, loss=0.39697]
Train:   0% 4/2000 [00:00<10:06,  3.29 step/s, accuracy=0.84375, loss=0.46878, step=146004]

Step 146000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:01<00:00,  8.27 step/s, accuracy=0.89703, loss=0.36235, step=148000]
Valid: 100% 6944/6944 [00:30<00:00, 227.32 uttr/s, accuracy=0.90495, loss=0.39575]
Train:   0% 4/2000 [00:00<06:15,  5.32 step/s, accuracy=0.92188, loss=0.32398, step=148004]

Step 148000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:02<00:00,  8.26 step/s, accuracy=0.89847, loss=0.36120, step=150000]
Valid: 100% 6944/6944 [00:30<00:00, 231.10 uttr/s, accuracy=0.89919, loss=0.41564]
Train:   0% 2/2000 [00:00<15:48,  2.11 step/s, accuracy=0.81250, loss=0.51498, step=150002]

Step 150000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:03<00:00,  8.20 step/s, accuracy=0.89905, loss=0.35819, step=152000]
Valid: 100% 6944/6944 [00:30<00:00, 230.36 uttr/s, accuracy=0.90092, loss=0.42006]
Train:   0% 4/2000 [00:00<05:58,  5.57 step/s, accuracy=0.89062, loss=0.35275, step=152004]

Step 152000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:04<00:00,  8.18 step/s, accuracy=0.89938, loss=0.35719, step=154000]
Valid: 100% 6944/6944 [00:30<00:00, 228.42 uttr/s, accuracy=0.89876, loss=0.42008]
Train:   0% 4/2000 [00:00<10:38,  3.13 step/s, accuracy=0.89844, loss=0.34157, step=154004]

Step 154000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:05<00:00,  8.16 step/s, accuracy=0.89694, loss=0.36208, step=156000]
Valid: 100% 6944/6944 [00:29<00:00, 232.80 uttr/s, accuracy=0.90567, loss=0.40455]
Train:   0% 3/2000 [00:00<09:45,  3.41 step/s, accuracy=0.90625, loss=0.34871, step=156003]

Step 156000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:04<00:00,  8.20 step/s, accuracy=0.90075, loss=0.34565, step=158000]
Valid: 100% 6944/6944 [00:30<00:00, 230.49 uttr/s, accuracy=0.90639, loss=0.40398]
Train:   0% 4/2000 [00:01<28:16,  1.18 step/s, accuracy=0.89062, loss=0.37750, step=158004]

Step 158000, best model saved. (accuracy=0.9083)


Train: 100% 2000/2000 [04:06<00:00,  8.10 step/s, accuracy=0.89942, loss=0.35337, step=160000]
Valid: 100% 6944/6944 [00:29<00:00, 233.31 uttr/s, accuracy=0.90495, loss=0.41157]
Train:   0% 4/2000 [00:00<06:44,  4.94 step/s, accuracy=0.89844, loss=0.46231, step=160004]

Step 160000, best model saved. (accuracy=0.9083)


Train:  92% 1846/2000 [03:47<00:22,  6.74 step/s, accuracy=0.90037, loss=0.35240, step=161845]

# Inference (Testing)

## Dataset of inference (Testing)

In [None]:
import os
import json
import torch
from pathlib import Path
from torch.utils.data import Dataset

class InferenceDataset(Dataset):
  def __init__(self, data_dir):
    testdata_path = Path(data_dir) / "testdata.json"
    metadata = json.load(testdata_path.open())
    self.data_dir = data_dir
    self.data = metadata["utterances"]
    self.segment_len = CONFIG['SEGMENT_LEN']

  def __len__(self):
    return len(self.data)

  def __getitem__(self, index):
    utterance = self.data[index]
    feat_path = utterance["feature_path"]
    mel = torch.load(os.path.join(self.data_dir, feat_path))

    if len(mel) < self.segment_len:
      mel_new = torch.zeros(self.segment_len, 40)
      mel_new[:mel.shape[0], :40] = mel
      mel = mel_new
      mel = torch.FloatTensor(mel)

    return feat_path, mel


def inference_collate_batch(batch):
  """Collate a batch of data."""
  feat_paths, mels = zip(*batch)

  return feat_paths, torch.stack(mels)



## Main funcrion of Inference (Testing)

In [None]:
import json
import csv
from pathlib import Path
from tqdm.notebook import tqdm

import torch
from torch.utils.data import DataLoader

def parse_args():
  """arguments"""
  config = {
    "data_dir": "./Dataset",
    "model_path": CONFIG['MODEL_PATH'],
    "output_path": "./output.csv",
  }

  return config

def main(
  data_dir,
  model_path,
  output_path,
):
  """Main function."""
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  print(f"[Info]: Use {device} now!")

  mapping_path = Path(data_dir) / "mapping.json"
  mapping = json.load(mapping_path.open())

  dataset = InferenceDataset(data_dir)
  dataloader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=8,
    collate_fn=inference_collate_batch,
  )
  print(f"[Info]: Finish loading data!",flush = True)

  speaker_num = len(mapping["id2speaker"])
  model = Classifier(n_spks=speaker_num).to(device)
  model.zeros = model.zeros.to(device)
  model.load_state_dict(torch.load(model_path))
  model.eval()
  print(f"[Info]: Finish creating model!",flush = True)

  results = [["Id", "Category"]]
  for feat_paths, mels in tqdm(dataloader):
    with torch.no_grad():
      mels = mels.to(device)
      outs = model(mels)
      preds = outs.argmax(1).cpu().numpy()
      for feat_path, pred in zip(feat_paths, preds):
        results.append([feat_path, mapping["id2speaker"][str(pred)]])
  
  with open(output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(results)


if __name__ == "__main__":
  main(**parse_args())
