# Download Data
Download data from google drive, then unzip it.

You should have
- `libriphone/train_split.txt`: training metadata
- `libriphone/train_labels`: training labels
- `libriphone/test_split.txt`: testing metadata
- `libriphone/feat/train/*.pt`: training feature
- `libriphone/feat/test/*.pt`:  testing feature

after running the following block.

> **Notes: if the google drive link is dead, you can download the data directly from [Kaggle](https://www.kaggle.com/c/ml2023spring-hw2/data) and upload it to the workspace.**


!pip install --upgrade gdown

# Main link
!gdown --id '1N1eVIDe9hKM5uiNRGmifBlwSDGiVXPJe' --output libriphone.zip
# !gdown --id '1qzCRnywKh30mTbWUEjXuNT2isOCAPdO1' --output libriphone.zip

!unzip -q libriphone.zip
!ls libriphone

# Some Utility Functions
**Fixes random number generator seeds for reproducibility.**

In [1]:
import numpy as np
import torch
import random

def same_seeds(seed):
    random.seed(seed) 
    np.random.seed(seed)  
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed) 
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

  from .autonotebook import tqdm as notebook_tqdm


**Helper functions to pre-process the training data from raw MFCC features of each utterance.**

A phoneme may span several frames and is dependent to past and future frames. \
Hence we concatenate neighboring phonemes for training to achieve higher accuracy. The **concat_feat** function concatenates past and future k frames (total 2k+1 = n frames), and we predict the center frame.

Feel free to modify the data preprocess functions, but **do not drop any frame** (if you modify the functions, remember to check that the number of frames are the same as mentioned in the slides)

In [2]:
import os
import torch
from tqdm import tqdm

def load_feat(path):
    feat = torch.load(path)
    return feat

def shift(x, n):
    if n < 0:
        left = x[0].repeat(-n, 1)
        right = x[:n]
    elif n > 0:
        right = x[-1].repeat(n, 1)
        left = x[n:]
    else:
        return x

    return torch.cat((left, right), dim=0)

def concat_feat(x, concat_n):
    assert concat_n % 2 == 1 # n must be odd
    if concat_n < 2:
        return x
    seq_len, feature_dim = x.size(0), x.size(1)
    x = x.repeat(1, concat_n) 
    x = x.view(seq_len, concat_n, feature_dim).permute(1, 0, 2) # concat_n, seq_len, feature_dim
    mid = (concat_n // 2)
    for r_idx in range(1, mid+1):
        x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
        x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)

    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)

def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8, random_seed=1213):
    class_num = 41 # NOTE: pre-computed, should not need change

    if split == 'train' or split == 'val':
        mode = 'train'
    elif split == 'test':
        mode = 'test'
    else:
        raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!')

    label_dict = {}
    if mode == 'train':
        for line in open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines():
            line = line.strip('\n').split(' ')
            label_dict[line[0]] = [int(p) for p in line[1:]]
        
        # split training and validation data
        usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines()
        random.seed(random_seed)
        random.shuffle(usage_list)
        train_len = int(len(usage_list) * train_ratio)
        usage_list = usage_list[:train_len] if split == 'train' else usage_list[train_len:]

    elif mode == 'test':
        usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines()

    usage_list = [line.strip('\n') for line in usage_list]
    print('[Dataset] - # phone classes: ' + str(class_num) + ', number of utterances for ' + split + ': ' + str(len(usage_list)))

    max_len = 3000000
    X = torch.empty(max_len, 39 * concat_nframes)
    if mode == 'train':
        y = torch.empty(max_len, dtype=torch.long)

    idx = 0
    for i, fname in tqdm(enumerate(usage_list)):
        feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
        cur_len = len(feat)
        # print(cur_len)
        feat = concat_feat(feat, concat_nframes)
        if mode == 'train':
          label = torch.LongTensor(label_dict[fname])

        X[idx: idx + cur_len, :] = feat
        if mode == 'train':
          y[idx: idx + cur_len] = label

        idx += cur_len

    X = X[:idx, :]
    if mode == 'train':
      y = y[:idx]

    print(f'[INFO] {split} set')
    print(X.shape)
    if mode == 'train':
      print(y.shape)
      return X, y
    else:
      return X


# Dataset

In [3]:
import torch
from torch.utils.data import Dataset

class LibriDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = X
        if y is not None:
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)


# Model
Feel free to modify the structure of the model.

In [4]:
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self, 
            input_dim: int, 
            num_layers: int, 
            concat_nframes: int, 
            bidirectional: bool, 
            output_dim: int, 
            hidden_dim: int, 
            dropout: float,
        ) -> None:
        super(Classifier, self).__init__()

        # TODO: apply batch normalization and dropout for strong baseline.
        self.input_dim = input_dim
        self.seq_length = concat_nframes
        self.dropout = nn.Dropout(p=dropout)
        self.relu = torch.nn.LeakyReLU(0.1)
        self.batchNorm1 = nn.BatchNorm1d(4*hidden_dim)
        self.batchNorm2 = nn.BatchNorm1d(hidden_dim)

        self.lstm = nn.LSTM(
            input_size = input_dim,
            num_layers = num_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            hidden_size = hidden_dim,
            batch_first = True # input/output as (batch,seq,feature)
        )
        self.relu = nn.ReLU()
        if bidirectional == True:
            self.classifier_1 =  torch.nn.Linear(hidden_dim*4, hidden_dim)
        elif bidirectional == False:
            self.classifier_1 =  torch.nn.Linear(hidden_dim, hidden_dim)
        
        self.classifer_2 = torch.nn.Linear(hidden_dim, output_dim)
        # self.layer_norm = torch.nn.LayerNorm([128, 150, 150])

    # Reference : https://blog.csdn.net/qq_43613342/article/details/127007955
    def forward(self, x):
        x = x.view(-1, self.seq_length, self.input_dim) # reshape for lstm input
        lstm_out, _ = self.lstm(x) # lstm output
        # hidden_state = lstm_out[:,-1,:]
        lstm_forward, lstm_backword = lstm_out[:, -1, :], lstm_out[:, 0, :]
        hidden_state = torch.cat((lstm_forward, lstm_backword), dim=1)
        hidden_state = self.relu(hidden_state)
        hidden_state = self.batchNorm1(hidden_state)
        hidden_state = self.dropout(hidden_state)
        output = self.classifier_1(hidden_state)
        output = self.relu(output)
        output = self.batchNorm2(output)
        output = self.dropout(output)
        output = self.classifer_2(output)

        return output

# Hyper-parameters

In [5]:
# data prarameters
# TODO: change the value of "concat_nframes" for medium baseline
concat_nframes = 49   # the number of frames to concat with, n must be odd (total 2k+1 = n frames)  # Current best : 49
train_ratio = 0.99   # the ratio of data used for training, the rest will be used for validation

# training parameters
seed = 1777          # random seed
batch_size = 512        # batch size, Current best : 2048
num_epoch = 30         # the number of training epoch, Current best : 20
learning_rate = 1e-3      # learning rate, Current best : le-3
model_path = './'  # the path where the checkpoint will be saved
n_estimators = 15        # Current best : 15

# model parameters
# TODO: change the value of "hidden_layers" or "hidden_dim" for medium baseline
# input_dim = 39 * concat_nframes  # the input dim of the model, you should not change the value
input_dim = 39
hidden_dim = 512        # the hidden dim, Current best : 256
# hidden_layers = 3        # Current best : 3
dropout = 0.5        # Current best : 0.7
bidirectional = True
num_layers = 2
output_dim = 41        # 41 classes


# Dataloader

In [6]:
from torch.utils.data import DataLoader
import gc

same_seeds(seed)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'DEVICE: {device}')

# preprocess data
train_X, train_y = preprocess_data(split='train', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio, random_seed=seed)
val_X, val_y = preprocess_data(split='val', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio, random_seed=seed)

# get dataset
train_set = LibriDataset(train_X, train_y)
val_set = LibriDataset(val_X, val_y)

# remove raw feature to save memory
del train_X, train_y, val_X, val_y
gc.collect()

# get dataloader
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)


DEVICE: cuda
[Dataset] - # phone classes: 41, number of utterances for train: 3394


3394it [00:45, 74.39it/s] 


[INFO] train set
torch.Size([2096655, 1911])
torch.Size([2096655])
[Dataset] - # phone classes: 41, number of utterances for val: 35


35it [00:00, 568.31it/s]

[INFO] val set
torch.Size([20139, 1911])
torch.Size([20139])





# Training with Esemble Learning

In [7]:
from torchensemble import BaggingClassifier
from torchensemble.utils.logging import set_logger

logger = set_logger('acc_BaggingClassifier')

# create model, define a loss function, and optimizer
model = Classifier(
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    num_layers=num_layers, 
    bidirectional=bidirectional,
    concat_nframes = concat_nframes,
    output_dim=output_dim,
    dropout=dropout
).to(device)

model = BaggingClassifier(
    estimator = model,
    n_estimators = n_estimators,
    cuda = True
)

criterion = nn.CrossEntropyLoss()

model.set_criterion(criterion)
model.set_optimizer('Adam', lr=learning_rate)
model.set_scheduler('CosineAnnealingLR', T_max=20, eta_min=0)

model.fit(
    train_loader,
    epochs=num_epoch,
    test_loader=val_loader,
    log_interval=len(train_set)/batch_size
)


Log will be saved in '/home/darkqq/LAB/robert/ML/HW2/logs'.
Start logging into file /home/darkqq/LAB/robert/ML/HW2/logs/acc_BaggingClassifier-2023_03_24_14_58.log...
Estimator: 000 | Epoch: 000 | Batch: 000 | Loss: 4.07741 | Correct: 8/512
Estimator: 001 | Epoch: 000 | Batch: 000 | Loss: 3.96762 | Correct: 14/512
Estimator: 002 | Epoch: 000 | Batch: 000 | Loss: 4.02998 | Correct: 7/512
Estimator: 003 | Epoch: 000 | Batch: 000 | Loss: 4.01291 | Correct: 12/512
Estimator: 004 | Epoch: 000 | Batch: 000 | Loss: 4.04795 | Correct: 7/512
Estimator: 005 | Epoch: 000 | Batch: 000 | Loss: 3.97559 | Correct: 12/512
Estimator: 006 | Epoch: 000 | Batch: 000 | Loss: 3.99244 | Correct: 11/512
Estimator: 007 | Epoch: 000 | Batch: 000 | Loss: 4.07953 | Correct: 10/512
Estimator: 008 | Epoch: 000 | Batch: 000 | Loss: 3.99123 | Correct: 10/512
Estimator: 009 | Epoch: 000 | Batch: 000 | Loss: 4.01495 | Correct: 10/512
Estimator: 010 | Epoch: 000 | Batch: 000 | Loss: 4.01808 | Correct: 10/512
Estimator: 0

2023-03-24 16:07:17,584 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 16:07:18,192 - INFO: Epoch: 000 | Validation Acc: 79.329 % | Historical Best: 79.329 %


Estimator: 000 | Epoch: 001 | Batch: 000 | Loss: 0.63056 | Correct: 409/512
Estimator: 001 | Epoch: 001 | Batch: 000 | Loss: 0.73969 | Correct: 387/512
Estimator: 002 | Epoch: 001 | Batch: 000 | Loss: 0.76731 | Correct: 389/512
Estimator: 003 | Epoch: 001 | Batch: 000 | Loss: 0.71254 | Correct: 398/512
Estimator: 004 | Epoch: 001 | Batch: 000 | Loss: 0.76016 | Correct: 404/512
Estimator: 005 | Epoch: 001 | Batch: 000 | Loss: 0.61754 | Correct: 421/512
Estimator: 006 | Epoch: 001 | Batch: 000 | Loss: 0.66790 | Correct: 408/512
Estimator: 007 | Epoch: 001 | Batch: 000 | Loss: 0.67146 | Correct: 408/512
Estimator: 008 | Epoch: 001 | Batch: 000 | Loss: 0.64322 | Correct: 420/512
Estimator: 009 | Epoch: 001 | Batch: 000 | Loss: 0.63161 | Correct: 420/512
Estimator: 010 | Epoch: 001 | Batch: 000 | Loss: 0.72106 | Correct: 407/512
Estimator: 011 | Epoch: 001 | Batch: 000 | Loss: 0.79079 | Correct: 383/512
Estimator: 012 | Epoch: 001 | Batch: 000 | Loss: 0.67201 | Correct: 406/512
Estimator: 0

2023-03-24 17:02:58,883 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 17:02:59,516 - INFO: Epoch: 001 | Validation Acc: 81.826 % | Historical Best: 81.826 %


Estimator: 000 | Epoch: 002 | Batch: 000 | Loss: 0.37870 | Correct: 447/512
Estimator: 001 | Epoch: 002 | Batch: 000 | Loss: 0.36994 | Correct: 446/512
Estimator: 002 | Epoch: 002 | Batch: 000 | Loss: 0.40869 | Correct: 449/512
Estimator: 003 | Epoch: 002 | Batch: 000 | Loss: 0.37168 | Correct: 443/512
Estimator: 004 | Epoch: 002 | Batch: 000 | Loss: 0.40595 | Correct: 453/512
Estimator: 005 | Epoch: 002 | Batch: 000 | Loss: 0.43186 | Correct: 440/512
Estimator: 006 | Epoch: 002 | Batch: 000 | Loss: 0.49730 | Correct: 436/512
Estimator: 007 | Epoch: 002 | Batch: 000 | Loss: 0.41565 | Correct: 441/512
Estimator: 008 | Epoch: 002 | Batch: 000 | Loss: 0.42179 | Correct: 441/512
Estimator: 009 | Epoch: 002 | Batch: 000 | Loss: 0.45980 | Correct: 448/512
Estimator: 010 | Epoch: 002 | Batch: 000 | Loss: 0.44734 | Correct: 438/512
Estimator: 011 | Epoch: 002 | Batch: 000 | Loss: 0.44019 | Correct: 447/512
Estimator: 012 | Epoch: 002 | Batch: 000 | Loss: 0.49316 | Correct: 437/512
Estimator: 0

2023-03-24 17:58:40,678 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 17:58:41,290 - INFO: Epoch: 002 | Validation Acc: 82.382 % | Historical Best: 82.382 %


Estimator: 000 | Epoch: 003 | Batch: 000 | Loss: 0.39632 | Correct: 452/512
Estimator: 001 | Epoch: 003 | Batch: 000 | Loss: 0.33025 | Correct: 459/512
Estimator: 002 | Epoch: 003 | Batch: 000 | Loss: 0.36605 | Correct: 451/512
Estimator: 003 | Epoch: 003 | Batch: 000 | Loss: 0.35565 | Correct: 456/512
Estimator: 004 | Epoch: 003 | Batch: 000 | Loss: 0.36697 | Correct: 453/512
Estimator: 005 | Epoch: 003 | Batch: 000 | Loss: 0.33914 | Correct: 457/512
Estimator: 006 | Epoch: 003 | Batch: 000 | Loss: 0.33273 | Correct: 461/512
Estimator: 007 | Epoch: 003 | Batch: 000 | Loss: 0.22191 | Correct: 476/512
Estimator: 008 | Epoch: 003 | Batch: 000 | Loss: 0.42907 | Correct: 447/512
Estimator: 009 | Epoch: 003 | Batch: 000 | Loss: 0.32245 | Correct: 464/512
Estimator: 010 | Epoch: 003 | Batch: 000 | Loss: 0.29903 | Correct: 458/512
Estimator: 011 | Epoch: 003 | Batch: 000 | Loss: 0.32784 | Correct: 453/512
Estimator: 012 | Epoch: 003 | Batch: 000 | Loss: 0.30639 | Correct: 466/512
Estimator: 0

2023-03-24 18:54:21,236 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 18:54:21,843 - INFO: Epoch: 003 | Validation Acc: 82.487 % | Historical Best: 82.487 %


Estimator: 000 | Epoch: 004 | Batch: 000 | Loss: 0.31189 | Correct: 470/512
Estimator: 001 | Epoch: 004 | Batch: 000 | Loss: 0.24015 | Correct: 469/512
Estimator: 002 | Epoch: 004 | Batch: 000 | Loss: 0.31112 | Correct: 463/512
Estimator: 003 | Epoch: 004 | Batch: 000 | Loss: 0.22092 | Correct: 473/512
Estimator: 004 | Epoch: 004 | Batch: 000 | Loss: 0.24531 | Correct: 473/512
Estimator: 005 | Epoch: 004 | Batch: 000 | Loss: 0.27936 | Correct: 472/512
Estimator: 006 | Epoch: 004 | Batch: 000 | Loss: 0.23653 | Correct: 471/512
Estimator: 007 | Epoch: 004 | Batch: 000 | Loss: 0.30046 | Correct: 461/512
Estimator: 008 | Epoch: 004 | Batch: 000 | Loss: 0.27658 | Correct: 472/512
Estimator: 009 | Epoch: 004 | Batch: 000 | Loss: 0.32351 | Correct: 464/512
Estimator: 010 | Epoch: 004 | Batch: 000 | Loss: 0.22027 | Correct: 475/512
Estimator: 011 | Epoch: 004 | Batch: 000 | Loss: 0.28720 | Correct: 456/512
Estimator: 012 | Epoch: 004 | Batch: 000 | Loss: 0.26471 | Correct: 472/512
Estimator: 0

2023-03-24 19:50:02,373 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 19:50:02,987 - INFO: Epoch: 004 | Validation Acc: 82.725 % | Historical Best: 82.725 %


Estimator: 000 | Epoch: 005 | Batch: 000 | Loss: 0.20986 | Correct: 477/512
Estimator: 001 | Epoch: 005 | Batch: 000 | Loss: 0.20931 | Correct: 480/512
Estimator: 002 | Epoch: 005 | Batch: 000 | Loss: 0.22891 | Correct: 467/512
Estimator: 003 | Epoch: 005 | Batch: 000 | Loss: 0.23277 | Correct: 477/512
Estimator: 004 | Epoch: 005 | Batch: 000 | Loss: 0.23307 | Correct: 476/512
Estimator: 005 | Epoch: 005 | Batch: 000 | Loss: 0.17423 | Correct: 479/512
Estimator: 006 | Epoch: 005 | Batch: 000 | Loss: 0.20362 | Correct: 479/512
Estimator: 007 | Epoch: 005 | Batch: 000 | Loss: 0.23071 | Correct: 465/512
Estimator: 008 | Epoch: 005 | Batch: 000 | Loss: 0.19326 | Correct: 480/512
Estimator: 009 | Epoch: 005 | Batch: 000 | Loss: 0.17258 | Correct: 480/512
Estimator: 010 | Epoch: 005 | Batch: 000 | Loss: 0.20394 | Correct: 479/512
Estimator: 011 | Epoch: 005 | Batch: 000 | Loss: 0.20457 | Correct: 474/512
Estimator: 012 | Epoch: 005 | Batch: 000 | Loss: 0.24732 | Correct: 470/512
Estimator: 0

2023-03-24 20:45:44,138 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 20:45:44,776 - INFO: Epoch: 005 | Validation Acc: 82.834 % | Historical Best: 82.834 %


Estimator: 000 | Epoch: 006 | Batch: 000 | Loss: 0.16909 | Correct: 488/512
Estimator: 001 | Epoch: 006 | Batch: 000 | Loss: 0.20142 | Correct: 480/512
Estimator: 002 | Epoch: 006 | Batch: 000 | Loss: 0.24860 | Correct: 478/512
Estimator: 003 | Epoch: 006 | Batch: 000 | Loss: 0.19785 | Correct: 477/512
Estimator: 004 | Epoch: 006 | Batch: 000 | Loss: 0.16612 | Correct: 482/512
Estimator: 005 | Epoch: 006 | Batch: 000 | Loss: 0.16564 | Correct: 480/512
Estimator: 006 | Epoch: 006 | Batch: 000 | Loss: 0.15580 | Correct: 484/512
Estimator: 007 | Epoch: 006 | Batch: 000 | Loss: 0.16321 | Correct: 483/512
Estimator: 008 | Epoch: 006 | Batch: 000 | Loss: 0.21117 | Correct: 474/512
Estimator: 009 | Epoch: 006 | Batch: 000 | Loss: 0.17356 | Correct: 487/512
Estimator: 010 | Epoch: 006 | Batch: 000 | Loss: 0.20726 | Correct: 478/512
Estimator: 011 | Epoch: 006 | Batch: 000 | Loss: 0.14322 | Correct: 484/512
Estimator: 012 | Epoch: 006 | Batch: 000 | Loss: 0.16429 | Correct: 485/512
Estimator: 0

2023-03-24 21:41:27,878 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 21:41:28,496 - INFO: Epoch: 006 | Validation Acc: 82.968 % | Historical Best: 82.968 %


Estimator: 000 | Epoch: 007 | Batch: 000 | Loss: 0.13205 | Correct: 494/512
Estimator: 001 | Epoch: 007 | Batch: 000 | Loss: 0.13796 | Correct: 487/512
Estimator: 002 | Epoch: 007 | Batch: 000 | Loss: 0.15539 | Correct: 490/512
Estimator: 003 | Epoch: 007 | Batch: 000 | Loss: 0.09907 | Correct: 495/512
Estimator: 004 | Epoch: 007 | Batch: 000 | Loss: 0.13719 | Correct: 484/512
Estimator: 005 | Epoch: 007 | Batch: 000 | Loss: 0.15581 | Correct: 488/512
Estimator: 006 | Epoch: 007 | Batch: 000 | Loss: 0.20655 | Correct: 476/512
Estimator: 007 | Epoch: 007 | Batch: 000 | Loss: 0.11277 | Correct: 487/512
Estimator: 008 | Epoch: 007 | Batch: 000 | Loss: 0.18436 | Correct: 480/512
Estimator: 009 | Epoch: 007 | Batch: 000 | Loss: 0.21889 | Correct: 480/512
Estimator: 010 | Epoch: 007 | Batch: 000 | Loss: 0.14836 | Correct: 485/512
Estimator: 011 | Epoch: 007 | Batch: 000 | Loss: 0.18898 | Correct: 480/512
Estimator: 012 | Epoch: 007 | Batch: 000 | Loss: 0.25544 | Correct: 475/512
Estimator: 0

2023-03-24 22:37:15,029 - INFO: Saving the model to `./BaggingClassifier_Classifier_15_ckpt.pth`
2023-03-24 22:37:15,640 - INFO: Epoch: 007 | Validation Acc: 83.023 % | Historical Best: 83.023 %


Estimator: 000 | Epoch: 008 | Batch: 000 | Loss: 0.13101 | Correct: 484/512
Estimator: 001 | Epoch: 008 | Batch: 000 | Loss: 0.14794 | Correct: 482/512
Estimator: 002 | Epoch: 008 | Batch: 000 | Loss: 0.15411 | Correct: 490/512
Estimator: 003 | Epoch: 008 | Batch: 000 | Loss: 0.16178 | Correct: 480/512
Estimator: 004 | Epoch: 008 | Batch: 000 | Loss: 0.11231 | Correct: 493/512
Estimator: 005 | Epoch: 008 | Batch: 000 | Loss: 0.13924 | Correct: 484/512
Estimator: 006 | Epoch: 008 | Batch: 000 | Loss: 0.11262 | Correct: 489/512
Estimator: 007 | Epoch: 008 | Batch: 000 | Loss: 0.13476 | Correct: 489/512
Estimator: 008 | Epoch: 008 | Batch: 000 | Loss: 0.10568 | Correct: 497/512
Estimator: 009 | Epoch: 008 | Batch: 000 | Loss: 0.11625 | Correct: 488/512
Estimator: 010 | Epoch: 008 | Batch: 000 | Loss: 0.13157 | Correct: 485/512
Estimator: 011 | Epoch: 008 | Batch: 000 | Loss: 0.17686 | Correct: 484/512
Estimator: 012 | Epoch: 008 | Batch: 000 | Loss: 0.13409 | Correct: 494/512
Estimator: 0

2023-03-24 23:33:01,714 - INFO: Epoch: 008 | Validation Acc: 82.993 % | Historical Best: 83.023 %


Estimator: 000 | Epoch: 009 | Batch: 000 | Loss: 0.13942 | Correct: 488/512


KeyboardInterrupt: 

In [8]:
del train_set, val_set
del train_loader, val_loader
gc.collect()

3030

# Testing
Create a testing dataset, and load model from the saved checkpoint.

In [9]:
# load data
test_X = preprocess_data(split='test', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes)
test_set = LibriDataset(test_X, None)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

[Dataset] - # phone classes: 41, number of utterances for test: 857


857it [00:01, 461.57it/s]

[INFO] test set
torch.Size([527364, 1911])





In [10]:
from torchensemble.utils import io

model = Classifier(
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    num_layers=num_layers, 
    bidirectional=bidirectional,
    concat_nframes = concat_nframes,
    output_dim=output_dim,
    dropout=dropout
).to(device)

model = BaggingClassifier(
    estimator = model,
    n_estimators = n_estimators,
    cuda = True
)

io.load(model, save_dir=model_path)

Make prediction.

In [11]:
pred = np.array([], dtype=np.int32)

model.eval()
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        features = batch
        features = features.to(device)

        outputs = model(features)

        _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
        pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)

100%|██████████| 1031/1031 [03:57<00:00,  4.34it/s]


Write prediction to a CSV file.

After finish running this block, download the file `prediction.csv` from the files section on the left-hand side and submit it to Kaggle.

In [12]:
with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(pred):
        f.write('{},{}\n'.format(i, y))