In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader   
 
import pyspch.sp as Sps
import pyspch.display as Spd
import pyspch.core as Spch

from nn import utils
from nn import corpus
from nn import datasets
from nn import models

In [2]:
# paths
timit_path = 'W:/timit/CDdata/timit/' # to extract corpus, features, labels
root_path = 'D:/gitlab/psi/compi1234/nn/' # to read/write corpus, features, labels
os.chdir(root_path)

## Load TIMIT data

In [4]:
# TIMIT - read corpus from disk
timit_train = utils.read_txt('data/timit_train.corpus')
timit_test = utils.read_txt('data/timit_test.corpus')
timit_dummy = utils.read_txt('data/timit_dummy.corpus')

# TIMIT - read meta from disk
timit_meta = pd.read_csv('data/timit.meta', sep='\t', header=None)

In [6]:
### read MFCC39 features from disk
read_feature_path = 'data/dummy/mfcc39'

# initialize reader
reader = corpus.ArrayReader(mode='numpy', extension='.npy')

# TIMIT - read features
spchdata_dummy = corpus.SpchData(timit_dummy)
spchdata_dummy.read_features(read_feature_path, reader)

# read feature_args from json
feature_args_fname = os.path.join(read_feature_path, 'feature_args.json')
feature_args = utils.read_json(feature_args_fname)

In [7]:
# Extract phone labels from segmentation 
seg_path = 'W:/timit/CDdata/timit/'
seg_extension = ".phn"

# SpchData - extract labels
spchdata_dummy.extract_labels(seg_path, feature_args, seg_extension)

# Add padding
lengths = spchdata_dummy.get_length_features()
spchdata_dummy.pad_labels(lengths)

## Make Torch DataSet

In [8]:
TIMIT61 = ['aa', 'ae', 'ah', 'ao', 'aw', 'ax', 'ax-h', 'axr', 'ay', 'b', 'bcl', 'd', 'dcl', 'dh', 'dx', 
           'eh', 'el', 'en', 'epi', 'er', 'ey', 'f', 'g', 'gcl', 'h#', 'hh', 'hv', 'ih', 'ix', 'iy', 'jh',
           'k', 'kcl', 'l', 'm', 'n', 'ng', 'nx', 'ow', 'p', 'pau', 'pcl', 'q', 'r', 's', 't', 'tcl', 'th', 
           'uw', 'ux', 'v', 'w', 'y', 'z']

TIMIT41 = ['aa','ae', 'ah','ao','aw','er','ay','b','ch','d','dh','eh',
           'm','ng','ey','f','g','hh','ih','iy','jh','k','l','n','ow',
           'oy','p','r','s','sh','t','th','uh','uw','v','w','y','z','zh','sil','cl']


timit61_41={ 
    'axr': 'er',
    'em': 'm',
    'eng': 'ng',
    'nx': 'n',    
    'hv': 'hh',
    'kcl': 'cl',  'pcl': 'cl',  'tcl': 'cl',
    'h#': 'sil', 'pau': 'sil' ,   'q': 'sil', 
    ## different from 48 mapping
    'bcl': 'cl', 'dcl': 'cl',  'gcl': 'cl',
    'epi': 'sil',
    'dx': 't',
    'ax-h': 'ah', 'ix': 'ih','ax': 'ah', 'ux': 'uh',
    'el': 'l', 'en':'n' 
}


In [9]:
# DataSet arguments
splice_args = {'N': 5, 'stride': 2}

# TIMIT label mapping
lab41_2idx = {k: v for v, k in enumerate(TIMIT41)}
lab2lab = {k: k for k in TIMIT61}
lab2lab.update(timit61_41)
lab2idx = {k: lab41_2idx[v] for k, v in lab2lab.items()}

In [10]:
# create SpchDataset
fnames = spchdata_dummy.corpus
features = spchdata_dummy.features
labels = spchdata_dummy.labels

# initialize
spch_ds = datasets.SpchDataset(fnames, features, labels)

# target encoding 
spch_ds.encode_target(lab2idx)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
# format
spch_ds.to_tensor()
spch_ds.to_device(device)

# sampler (splicing during __getitem__)
lengths = spchdata_dummy.get_length_features()
spch_ds.set_sampler(lengths, splice_args)

## Model 

In [38]:
# model arguments (from dataset)
in_dim = spch_ds.__getitem__(0)[0].shape[0]
out_dim = len(spch_ds.lab2idx)
hidden_layer_sizes = [1024, 768, 512]

# model arguments
model_args = {
    'type': 'dnn',
    'in_dim': in_dim,
    'out_dim': out_dim,
    'hidden_layer_sizes': hidden_layer_sizes
}

# model
model = models.FFDNN(in_dim, out_dim, hidden_layer_sizes)

# device

spch_ds.to_device(device)
model.to(device)

FFDNN(
  (net): Sequential(
    (0): Linear(in_features=702, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=768, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=768, out_features=512, bias=True)
    (5): Sigmoid()
    (6): Linear(in_features=512, out_features=56, bias=True)
  )
)

## Train model

In [39]:
# training parameters
training_args = {
    'n_epoch': 50,
    'patience': 15,
    'lrn_rate': 0.00001,
    'weight_decay': 0,
    'batch_size': 64,
    'shuffle': True,
    'num_workers': 0,
    'val_frac': 0.1,
}

# criterion & optimizer
criterion = nn.CrossEntropyLoss() # applies softmax()
optimizer = torch.optim.Adam(model.parameters(), 
                             lr=training_args['lrn_rate'], 
                             weight_decay=training_args['weight_decay'])

# validation set
if training_args['val_frac'] is not None:
    n_ex = len(spch_ds)
    n_valid = int(n_ex * training_args['val_frac'])
    n_train = n_ex - n_valid
    valid_ds, train_ds = torch.utils.data.random_split(spch_ds, [n_valid, n_train])
else:
    train_ds = spch_ds
    
# iterator
train_dl = DataLoader(train_ds, 
    batch_size=training_args['batch_size'],
    shuffle=training_args['shuffle'], 
    num_workers=training_args['num_workers'])

if training_args['val_frac'] is not None:
    valid_dl = DataLoader(valid_ds, 
        batch_size=training_args['batch_size'],
        shuffle=training_args['shuffle'], 
        num_workers=training_args['num_workers'])

In [40]:
# arguments
every = 5 
current_epoch = 0

# train model
tr_loss, val_loss, epoch = models.train(model, train_dl, criterion, optimizer, clip=None, 
    current_epoch=current_epoch, n_epochs=training_args['n_epoch'],
    valid_dl=valid_dl, patience=training_args['patience'], every=every)


Epoch 0 -- av. train loss per mini-batch 3.93
	 -- av. validation loss per mini-batch 3.79
Epoch 5 -- av. train loss per mini-batch 3.24
	 -- av. validation loss per mini-batch 3.28
Epoch 10 -- av. train loss per mini-batch 3.17
	 -- av. validation loss per mini-batch 3.20
Epoch 15 -- av. train loss per mini-batch 3.14
	 -- av. validation loss per mini-batch 3.20
Epoch 20 -- av. train loss per mini-batch 3.13
	 -- av. validation loss per mini-batch 3.18
Epoch 25 -- av. train loss per mini-batch 3.12
	 -- av. validation loss per mini-batch 3.17
Epoch 30 -- av. train loss per mini-batch 3.11
	 -- av. validation loss per mini-batch 3.18
Epoch 35 -- av. train loss per mini-batch 3.11
	 -- av. validation loss per mini-batch 3.17
Epoch 40 -- av. train loss per mini-batch 3.11
	 -- av. validation loss per mini-batch 3.14
Epoch 45 -- av. train loss per mini-batch 3.10
	 -- av. validation loss per mini-batch 3.14


## Decoding

In [None]:
# int2label
idx2lab= {v: k for k, v in spch_ds.lab2idx.items()}
labels = list(idx2lab.values())
print(len(labels))

42


## Evaluate model

In [41]:
## Evaluate with Dataloader

# evaluate 
loss = model.evaluate(train_dl, criterion)
cm = model.evaluate_cm(train_dl)