# Temporal Convolutional Networks
See https://medium.com/the-artificial-impostor/notes-understanding-tensorflow-part-3-7f6633fcc7c7 and https://github.com/locuslab/TCN/blob/master/TCN/tcn.py

In [72]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from scipy import signal
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

from speech2phone.preprocessing.TIMIT.phones import get_data, get_phones, phones
from speech2phone.experiments.tcn import TemporalConvNet

In [2]:
def resample(data, y):
    """Resample audio to 800 points."""
    return signal.resample(data, 800), y

In [53]:
audio, labels = get_data(preprocessor=resample, batch_preprocess=False, TIMIT_root='../TIMIT/TIMIT', padding=500)
phonemes = get_phones(labels)
n_phones = len(phones)

Loading train/resample/500 set from cache... done.


In [54]:
audio_tensor = torch.Tensor(audio)
labels_tensor = torch.Tensor(labels)
print(audio_tensor.shape)
print(labels_tensor.shape)

torch.Size([132810, 800])
torch.Size([132810])


In [73]:
full_dataset = TensorDataset(audio_tensor, labels_tensor) # Dataset requires same batch dimension
train_size = int(0.9 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

In [108]:
model = TemporalConvNet(num_inputs=800, num_channels=[400, 200, 61])
train_loader = DataLoader(train_dataset, batch_size=128)
test_loader = DataLoader(test_dataset, batch_size=1024)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
n_epochs = 2
n_print_every = 20

print("Batches per Epoch: {}".format(len(train_loader)))

for epoch in range(n_epochs):
    
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, y_truth = data
        inputs = inputs.unsqueeze(-1)
        y_truth = y_truth.long()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        outputs = outputs.squeeze(2)
#         print("inputs: {}, outputs: {}, labels: {}".format(inputs.shape, outputs.shape, y_truth.shape))
        loss = criterion(outputs, y_truth)
        loss.backward()
        optimizer.step()
        
        y_pred = torch.argmax(outputs, dim=1)
#         print(y_pred)
#         print(y_truth)
        train_acc = (y_pred == y_truth).float().mean()
#         print("acc: {}".format(acc))

        # print statistics
        running_loss += loss.item()
        if i % n_print_every == n_print_every - 1:    # print every 2000 mini-batches
            inputs, y_truth = next(iter(test_loader))
            inputs = inputs.unsqueeze(-1)
            y_truth = y_truth.long()
            y_pred = torch.argmax(model(inputs).squeeze(2), dim=1)
            test_acc = (y_pred == y_truth).float().mean()
            
            print('[%d, %5d] loss: %.3f train acc %.3f test acc: %.3f' %
                  (epoch + 1, i + 1, running_loss / n_print_every, train_acc, test_acc))
            running_loss = 0.0
                            
print("done")

Batches per Epoch: 934
[1,    20] loss: 5.149 train acc 0.023 test acc: 0.026
[1,    40] loss: 4.322 train acc 0.016 test acc: 0.023
[1,    60] loss: 4.163 train acc 0.031 test acc: 0.027
[1,    80] loss: 4.132 train acc 0.047 test acc: 0.020
[1,   100] loss: 4.117 train acc 0.031 test acc: 0.017
[1,   120] loss: 4.113 train acc 0.023 test acc: 0.017
[1,   140] loss: 4.109 train acc 0.031 test acc: 0.022
[1,   160] loss: 4.114 train acc 0.008 test acc: 0.021
[1,   180] loss: 4.108 train acc 0.047 test acc: 0.018
[1,   200] loss: 4.103 train acc 0.023 test acc: 0.019
[1,   220] loss: 4.114 train acc 0.023 test acc: 0.014
[1,   240] loss: 4.110 train acc 0.008 test acc: 0.012
[1,   260] loss: 4.109 train acc 0.008 test acc: 0.015
[1,   280] loss: 4.114 train acc 0.016 test acc: 0.013
[1,   300] loss: 4.111 train acc 0.023 test acc: 0.011
[1,   320] loss: 4.107 train acc 0.031 test acc: 0.015
[1,   340] loss: 4.098 train acc 0.031 test acc: 0.021
[1,   360] loss: 4.105 train acc 0.023 tes

In [105]:
next(iter(test_loader))

[tensor([[ 1.6689e+02, -9.7737e+01, -5.7501e+01,  ...,  4.4316e+02,
           9.6222e+02,  7.9342e+02],
         [-9.9992e+00, -3.8094e+01,  1.0531e+01,  ..., -1.5922e+00,
          -1.1099e+01,  3.5163e-01],
         [-5.7790e+01, -1.3027e+02, -1.0136e+02,  ...,  3.3090e+00,
           6.4288e+01,  1.1337e+02],
         ...,
         [ 8.9595e+01,  2.7653e+01, -2.3872e+01,  ..., -2.8324e+02,
          -6.2566e+01,  2.0179e+02],
         [-1.0767e+02, -2.6406e+01, -4.3547e+01,  ..., -1.5874e+02,
          -2.6272e+02, -8.6479e+01],
         [ 1.5602e+01, -3.2628e+00, -2.1959e+01,  ...,  1.0605e+01,
           2.9967e+01,  2.8923e+01]]),
 tensor([37., 19., 57.,  ..., 58., 31., 23.])]