In [71]:
import torch

import torch.optim as optim

import numpy as np
import time
import os
import importlib

# Set seed
torch.manual_seed(0)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [72]:
import paths
import data
import ctc_crnn_model
import phoneme_list
import UtteranceDataset
import config
import util

In [73]:
reload_packages = [paths, data, ctc_crnn_model, util, config]
for package in reload_packages:
    importlib.reload(package)
# importlib.reload(data)

In [50]:
train_loader = data.get_loader("train")
val_loader = data.get_loader("val")

(24724,) (24724,)
Dataset Device: cuda
(1106,) (1106,)
Dataset Device: cuda


In [56]:
model = ctc_crnn_model.SpeechModel(phoneme_list.N_PHONEMES,num_rnn=4,hidden_size=512,nlayers=1)

In [57]:
print(model)

SpeechModel(
  (cnns): Sequential(
    (0): Conv1d(40, 64, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Conv1d(64, 128, kernel_size=(3,), stride=(2,), padding=(1,), bias=False)
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.01)
  )
  (rnns): ModuleList(
    (0): LSTM(128, 512, bidirectional=True)
    (1): LSTM(1024, 512, bidirectional=True)
    (2): LSTM(1024, 512, bidirectional=True)
    (3): LSTM(1024, 512, bidirectional=True)
  )
  (scoring): Linear(in_features=1024, out_features=47, bias=True)
)


In [58]:
def init_weights(m):
    if type(m) == torch.nn.Conv1d or type(m) == torch.nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [59]:
# initialize cnn layers
model.apply(init_weights)

SpeechModel(
  (cnns): Sequential(
    (0): Conv1d(40, 64, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Conv1d(64, 128, kernel_size=(3,), stride=(2,), padding=(1,), bias=False)
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.01)
  )
  (rnns): ModuleList(
    (0): LSTM(128, 512, bidirectional=True)
    (1): LSTM(1024, 512, bidirectional=True)
    (2): LSTM(1024, 512, bidirectional=True)
    (3): LSTM(1024, 512, bidirectional=True)
  )
  (scoring): Linear(in_features=1024, out_features=47, bias=True)
)

In [60]:
# initialize rnn layers
ctc_crnn_model.init_weights(model, weight_init=torch.nn.init.orthogonal_)

In [61]:
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, nesterov=True)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3*0.5, weight_decay=5e-4)

best_epoch, best_vali_loss, starting_epoch = 0, 400, 0

In [62]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)

In [63]:
# proceeding from old models
model_path = os.path.join(paths.output_path, '4bilstm_adam_10.pth.tar')
print("=> loading checkpoint '{}'".format(model_path))
checkpoint = torch.load(model_path)
starting_epoch = checkpoint['epoch']+1
# best_vali_acc = checkpoint['best_vali_acc']
model_state_dict = checkpoint['model_state_dict']
model.load_state_dict(model_state_dict)
optimizer.load_state_dict(checkpoint['optimizer_label_state_dict'])
best_vali_loss = checkpoint['best_vali_loss']
best_epoch = checkpoint['best_epoch']
print("=> loaded checkpoint '{}' (epoch {})"
      .format(model_path, checkpoint['epoch']))
# del checkpoint, model_state_dict

=> loading checkpoint '../outputs/4bilstm_adam_10.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_10.pth.tar' (epoch 10)


In [64]:
best_epoch, best_vali_loss

(6, 25.023090839385986)

In [67]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.000125
    weight_decay: 0
)

In [66]:
for param_group in optimizer.param_groups:
    param_group['lr'] = lr=0.00025*0.5

In [68]:
for state in optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

In [69]:
# ctc_model.run(model, optimizer, train_loader, val_loader, best_epoch, best_vali_loss, starting_epoch)
ctc_crnn_model.run_with_scheduler(model,
                             optimizer, 0, 0.5,
                             train_loader, val_loader, 
                             best_epoch, best_vali_loss, starting_epoch)

-- Starting training with scheduler.
### Epoch    11 
Epoch: 11	Batch: 50	Avg-Loss: 2.8695 
Epoch: 11	Batch: 100	Avg-Loss: 2.6571 
Epoch: 11	Batch: 150	Avg-Loss: 2.4437 
Epoch: 11	Batch: 200	Avg-Loss: 2.3853 
Epoch: 11	Batch: 250	Avg-Loss: 2.3259 
Epoch: 11	Batch: 300	Avg-Loss: 2.3572 
Epoch: 11	Batch: 350	Avg-Loss: 2.2635 
Train Loss: 1.8298	Val Loss: 27.9660	 
start eval
vali_distance: 6.4715	 
Epoch time used:  573.6263682842255 s 
### Epoch    12 
Epoch: 12	Batch: 50	Avg-Loss: 1.8416 
Epoch: 12	Batch: 100	Avg-Loss: 1.8242 
Epoch: 12	Batch: 150	Avg-Loss: 1.8021 
Epoch: 12	Batch: 200	Avg-Loss: 1.8916 
Epoch: 12	Batch: 250	Avg-Loss: 1.9154 
Epoch: 12	Batch: 300	Avg-Loss: 1.8032 
Epoch: 12	Batch: 350	Avg-Loss: 1.8440 
Train Loss: 1.4536	Val Loss: 29.0074	 
start eval
vali_distance: 6.5503	 
Epoch     1: reducing learning rate of group 0 to 6.2500e-05.
Epoch time used:  573.7871663570404 s 
### Epoch    13 
Epoch: 13	Batch: 50	Avg-Loss: 1.4240 
Epoch: 13	Batch: 100	Avg-Loss: 1.4476 
Epo

KeyboardInterrupt: 

In [70]:
util.wrap_up_experiment(os.path.join(paths.output_path, 'metrics.txt'))

In [33]:
# ctc_model.run_eval(model_2, val_loader)

# Prediction

In [74]:
test_loader = data.get_loader("test")

Dataset Device: cuda


In [75]:
for epoch in [11, '11_2', '11_3']:
    # checkpoint = torch.load("checkpoint.pt")
    model_prediction = ctc_crnn_model.SpeechModel(phoneme_list.N_PHONEMES,num_rnn=4,hidden_size=512,nlayers=1)

    # proceeding from old models
    model_path = os.path.join(paths.output_path, '4bilstm_adam_'+str(epoch)+'.pth.tar')
    print("=> loading checkpoint '{}'".format(model_path))
    checkpoint = torch.load(model_path)
    starting_epoch = checkpoint['epoch']
    # best_vali_acc = checkpoint['best_vali_acc']
    model_state_dict = checkpoint['model_state_dict']
    model_prediction.load_state_dict(model_state_dict)
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    best_vali_loss = checkpoint['best_vali_loss']
    best_epoch = checkpoint['best_epoch']
    print("=> loaded checkpoint '{}' (epoch {})"
          .format(model_path, checkpoint['epoch']))
    # del checkpoint, model_state_dict

    best_epoch

    model_prediction.cuda()

    encoded_prediction = ctc_crnn_model.predict(model_prediction, test_loader)

    import pandas as pd
    varification_results_df = pd.DataFrame({'Id':np.arange(len(test_loader)), 'predicted':np.array(encoded_prediction).flatten()})

    varification_results_df.to_csv(os.path.join('..','outputs', 'verification_submission_'+str(epoch)+'.csv'),index=False)

=> loading checkpoint '../outputs/4bilstm_adam_11.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_11.pth.tar' (epoch 11)
start prediction
0
100
200
300
400
500
=> loading checkpoint '../outputs/4bilstm_adam_11_2.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_11_2.pth.tar' (epoch 11)
start prediction
0
100
200
300
400
500
=> loading checkpoint '../outputs/4bilstm_adam_11_3.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_11_3.pth.tar' (epoch 11)
start prediction
0
100
200
300
400
500


In [80]:
ctc_crnn_model.ER().decoder

<ctcdecode.CTCBeamDecoder at 0x7fe9bf4c1240>

In [23]:
# torch.save(model.state_dict(), "checkpoint.pt")

In [1]:
import numpy as np

In [2]:
np.ceil(3/2)

2.0