In [17]:
import torch

import torch.optim as optim

import numpy as np
import time
import os
import importlib

# Set seed
torch.manual_seed(0)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [18]:
import paths
import data
import ctc_model
import phoneme_list
import UtteranceDataset
import config
import util

In [19]:
reload_packages = [paths, data, ctc_model, util, config]
for package in reload_packages:
    importlib.reload(package)
# importlib.reload(data)

In [4]:
train_loader = data.get_loader("train")
val_loader = data.get_loader("val")

(24724,) (24724,)
Dataset Device: cuda
(1106,) (1106,)
Dataset Device: cuda


In [35]:
model = ctc_model.SpeechModel(phoneme_list.N_PHONEMES,num_rnn=5,hidden_size=256,nlayers=1)

In [36]:
print(model)

SpeechModel(
  (rnns): ModuleList(
    (0): LSTM(40, 256, bidirectional=True)
    (1): LSTM(512, 256, bidirectional=True)
    (2): LSTM(512, 256, bidirectional=True)
    (3): LSTM(512, 256, bidirectional=True)
    (4): LSTM(512, 256, bidirectional=True)
  )
  (scoring): Linear(in_features=512, out_features=47, bias=True)
)


In [37]:
# initialization
ctc_model.init_weights(model, torch.nn.init.orthogonal_)

  bias_init(p.data)


In [38]:
# optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3*0.5, momentum=0.9, nesterov=True)

best_epoch, best_vali_loss, starting_epoch = 0, 400, 0

In [39]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.0005
    momentum: 0.9
    nesterov: True
    weight_decay: 0
)

In [40]:
# proceeding from old models
model_path = os.path.join(paths.output_path, '4bilstm_adam_10.pth.tar')
print("=> loading checkpoint '{}'".format(model_path))
checkpoint = torch.load(model_path)
starting_epoch = checkpoint['epoch']+1
# best_vali_acc = checkpoint['best_vali_acc']
model_state_dict = checkpoint['model_state_dict']
model.load_state_dict(model_state_dict)
# optimizer.load_state_dict(checkpoint['optimizer_label_state_dict'])
best_vali_loss = checkpoint['best_vali_loss']
best_epoch = checkpoint['best_epoch']
print("=> loaded checkpoint '{}' (epoch {})"
      .format(model_path, checkpoint['epoch']))
# del checkpoint, model_state_dict

=> loading checkpoint '../outputs/4bilstm_adam_10.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_10.pth.tar' (epoch 10)


In [41]:
# optimizer

In [42]:
# for param_group in optimizer.param_groups:
#     param_group['lr'] = lr=1e-3*0.5

In [43]:
# for state in optimizer.state.values():
#     for k, v in state.items():
#         if isinstance(v, torch.Tensor):
#             state[k] = v.cuda()

In [44]:
# ctc_model.run(model, optimizer, train_loader, val_loader, best_epoch, best_vali_loss, starting_epoch)
ctc_model.run_with_scheduler(model,
                             optimizer, 1, 0.5,
                             train_loader, val_loader, 
                             best_epoch, best_vali_loss, starting_epoch)

-- Starting training with scheduler.
### Epoch    11 
Epoch: 11	Batch: 50	Avg-Loss: 16.6724 
Epoch: 11	Batch: 100	Avg-Loss: 16.2930 
Epoch: 11	Batch: 150	Avg-Loss: 15.9613 
Epoch: 11	Batch: 200	Avg-Loss: 15.6137 
Epoch: 11	Batch: 250	Avg-Loss: 15.2193 
Epoch: 11	Batch: 300	Avg-Loss: 16.1339 
Epoch: 11	Batch: 350	Avg-Loss: 15.4541 
Train Loss: 14.5812	Val Loss: 24.5721	 
start eval
vali_distance: 7.0404	 
Epoch time used:  818.1478321552277 s 
### Epoch    12 
Epoch: 12	Batch: 50	Avg-Loss: 14.4943 
Epoch: 12	Batch: 100	Avg-Loss: 14.0548 
Epoch: 12	Batch: 150	Avg-Loss: 14.6938 
Epoch: 12	Batch: 200	Avg-Loss: 15.3011 
Epoch: 12	Batch: 250	Avg-Loss: 14.6722 
Epoch: 12	Batch: 300	Avg-Loss: 13.8810 
Train Loss: 13.5855	Val Loss: 24.5578	 
start eval
vali_distance: 7.0009	 
Epoch time used:  819.7332215309143 s 
### Epoch    13 
Epoch: 13	Batch: 50	Avg-Loss: 13.3452 
Epoch: 13	Batch: 100	Avg-Loss: 14.4191 
Epoch: 13	Batch: 150	Avg-Loss: 13.2637 
Epoch: 13	Batch: 200	Avg-Loss: 14.1425 
Epoch: 

In [48]:
util.wrap_up_experiment(os.path.join(paths.output_path, 'metrics.txt'))

In [33]:
# ctc_model.run_eval(model_2, val_loader)

# Prediction

In [14]:
test_loader = data.get_loader("test")

Dataset Device: cuda


In [45]:
for epoch in [15, 25]:
    # checkpoint = torch.load("checkpoint.pt")
    model_prediction = ctc_model.SpeechModel(phoneme_list.N_PHONEMES,num_rnn=5,hidden_size=256,nlayers=1)

    # proceeding from old models
    model_path = os.path.join(paths.output_path, '4bilstm_adam_'+str(epoch)+'.pth.tar')
    print("=> loading checkpoint '{}'".format(model_path))
    checkpoint = torch.load(model_path)
    starting_epoch = checkpoint['epoch']
    # best_vali_acc = checkpoint['best_vali_acc']
    model_state_dict = checkpoint['model_state_dict']
    model_prediction.load_state_dict(model_state_dict)
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    best_vali_loss = checkpoint['best_vali_loss']
    best_epoch = checkpoint['best_epoch']
    print("=> loaded checkpoint '{}' (epoch {})"
          .format(model_path, checkpoint['epoch']))
    # del checkpoint, model_state_dict

    best_epoch

    model_prediction.cuda()

    encoded_prediction = ctc_model.predict(model_prediction, test_loader)

    import pandas as pd
    varification_results_df = pd.DataFrame({'Id':np.arange(len(test_loader)), 'predicted':np.array(encoded_prediction).flatten()})

    varification_results_df.to_csv(os.path.join('..','outputs', 'verification_submission_'+str(epoch)+'.csv'),index=False)

=> loading checkpoint '../outputs/4bilstm_adam_15.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_15.pth.tar' (epoch 15)
start prediction
0
100
200
300
400
500
=> loading checkpoint '../outputs/4bilstm_adam_25.pth.tar'
=> loaded checkpoint '../outputs/4bilstm_adam_25.pth.tar' (epoch 25)
start prediction
0
100
200
300
400
500


In [23]:
# torch.save(model.state_dict(), "checkpoint.pt")

In [7]:
for data in test_loader:
    test = data
    print(len([data]))
    break

1


In [8]:
test[0]

tensor([[-5.0130, -4.4909, -4.8291,  ..., -2.0473, -3.3245, -2.6745],
        [-4.0408, -6.3393, -5.9088,  ..., -1.2934, -2.3482, -3.8307],
        [-6.5768, -8.1930, -4.9924,  ..., -1.7163, -2.9462, -3.5497],
        ...,
        [-7.2773, -2.9609, -3.3077,  ..., -3.0296, -1.9444, -2.8614],
        [-5.2828, -3.2241, -4.2793,  ..., -1.0198, -2.6685, -4.1456],
        [-4.9332, -4.8754, -4.3110,  ..., -0.7746, -2.8968, -3.4207]],
       device='cuda:0')

In [77]:
model(test.cuda())

(tensor([[[ 7.7970, -1.3460,  0.0933,  ..., -5.0684, -4.3781, -5.3427]],
 
         [[ 9.0795, -1.6963, -0.7084,  ..., -6.0590, -4.3441, -6.3822]],
 
         [[ 9.9727, -1.9067, -1.3714,  ..., -6.1406, -4.4554, -7.0818]],
 
         ...,
 
         [[ 5.9954,  0.4422, -0.6152,  ..., -2.7546, -0.4327, -5.7047]],
 
         [[ 5.3743, -0.7836, -0.9460,  ..., -1.6021, -0.8774, -4.4772]],
 
         [[ 4.6321, -1.7703, -1.2969,  ..., -1.3024, -2.1310, -4.3121]]],
        device='cuda:0', grad_fn=<AddBackward0>), tensor([542]))