# phon list and map

In [1]:
import phoneme_list as pl

In [2]:
n_stat = pl.N_STATES
n_phon = pl.N_PHONEMES
p_list = pl.PHONEME_LIST
p_map = pl.PHONEME_MAP


In [3]:
print(n_stat)
print(n_phon)
print(len(p_list))
print(len(p_map))
print(p_list)
print(p_map)

138
46
46
46
['+BREATH+', '+COUGH+', '+NOISE+', '+SMACK+', '+UH+', '+UM+', 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', 'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OY', 'P', 'R', 'S', 'SH', 'SIL', 'T', 'TH', 'UH', 'UW', 'V', 'W', 'Y', 'Z', 'ZH']
['_', '+', '~', '!', '-', '@', 'a', 'A', 'h', 'o', 'w', 'y', 'b', 'c', 'd', 'D', 'e', 'r', 'E', 'f', 'g', 'H', 'i', 'I', 'j', 'k', 'l', 'm', 'n', 'G', 'O', 'Y', 'p', 'R', 's', 'S', '.', 't', 'T', 'u', 'U', 'v', 'W', '?', 'z', 'Z']


# get a small piece of train/val data

In [4]:
import numpy as np

In [5]:
train_data_path = './../data/wsj0_train.npy'
train_label_path = './../data/wsj0_train_merged_labels.npy'

val_data_path = './../data/wsj0_dev.npy'
val_label_path = './../data/wsj0_dev_merged_labels.npy'

In [6]:
train_data = np.load(train_data_path, encoding='bytes')
train_label = np.load(train_label_path)

val_data = np.load(val_data_path, encoding='bytes')
val_label = np.load(val_label_path)

In [7]:
train_data_sample = train_data[:10]
train_label_sample = train_label[:10]

val_data_sample = val_data[:10]
val_label_sample = val_label[:10]

# process input

In [8]:
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [9]:
# phonome dataset
class PhonDataset(Dataset):
    def __init__(self, phon_list, labels):
        self.data = [torch.tensor(phon) for phon in phon_list]
        self.labels = torch.tensor(labels)
    
    def __getitem__(self, i):
        data = self.data[i]
        label = self.labels[i]
        return data.to(DEVICE), label.to(DEVICE)
    
    def __len__(self):
        return len(self.labels)

# collate_phon return your data sorted by length
def collate_phon(phon_list):
    inputs, targets = zip(*phon_list)
    lens = [len(inputs) for phon in inputs]
    seq_order = sorted(range(len(lens)), key=lens.__getitem__, reverse=True)
    inputs = [inputs[i] for i in seq_order]
    targets = [targets[i] for i in seq_order]
    return inputs,targets

In [None]:
# phonome dataloader
# Model that takes packed sequences in training
class PackedPhonModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, nlayers, stop):
        super(PackedPhonModel, self).__init__()
        self.vocab_size = vocab_size
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.nlayers = nlayers
        self.embedding = nn.Embedding(vocab_size , embed_size)
        self.rnn = nn.LSTM(input_size = embed_size, hidden_size = hidden_size,
                           num_layers = nlayers) # 1 layer, batch_size = False
        self.scoring = nn.Linear(hidden_size, vocab_size)
    
    # stop here
    def forward(self,phon_list): # list
        batch_size = len(phon_list)
        lens = [len(phon) for phon in phon_list] # lens of all lines (already sorted)
        bounds = [0]
        for l in lens:
            bounds.append(bounds[-1]+l) # bounds of all lines in the concatenated sequence
        seq_concat = torch.cat(seq_list) # concatenated sequence
        embed_concat = self.embedding(seq_concat) # concatenated embeddings
        embed_list = [embed_concat[bounds[i]:bounds[i+1]] for i in range(batch_size)] # embeddings per line
        packed_input = rnn.pack_sequence(embed_list) # packed version
        hidden = None
        output_packed,hidden = self.rnn(packed_input,hidden)
        output_padded, _ = rnn.pad_packed_sequence(output_packed) # unpacked output (padded)
        output_flatten = torch.cat([output_padded[:lens[i],i] for i in range(batch_size)]) # concatenated output
        scores_flatten = self.scoring(output_flatten) # concatenated logits
        return scores_flatten # return concatenated logits
    
    def generate(self, seq, n_words): # L x V
        generated_words = []
        embed = self.embedding(seq).unsqueeze(1) # L x 1 x E
        hidden = None
        output_lstm, hidden = self.rnn(embed,hidden) # L x 1 x H
        output = output_lstm[-1] # 1 x H
        scores = self.scoring(output) # 1 x V
        _,current_word = torch.max(scores,dim=1) # 1 x 1
        generated_words.append(current_word)
        if n_words > 1:
            for i in range(n_words-1):
                embed = self.embedding(current_word).unsqueeze(0) # 1 x 1 x E
                output_lstm, hidden = self.rnn(embed,hidden) # 1 x 1 x H
                output = output_lstm[0] # 1 x H
                scores = self.scoring(output) # V
                _,current_word = torch.max(scores,dim=1) # 1
                generated_words.append(current_word)
                if current_word[0].item()==self.stop: # If end of line
                    break
        return torch.cat(generated_words,dim=0)

In [None]:
def train_epoch_packed(model, optimizer, train_loader, val_loader):
    criterion = nn.CrossEntropyLoss(reduction="sum") # sum instead of averaging, to take into account the different lengths
    criterion = criterion.to(DEVICE)
    batch_id=0
    before = time.time()
    print("Training", len(train_loader), "number of batches")
    for inputs,targets in train_loader: # lists, presorted, preloaded on GPU
        batch_id+=1
        outputs = model(inputs)
        loss = criterion(outputs,torch.cat(targets)) # criterion of the concatenated output
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_id % 100 == 0:
            after = time.time()
            nwords = np.sum(np.array([len(l) for l in inputs]))
            lpw = loss.item() / nwords
            print("Time elapsed: ", after - before)
            print("At batch",batch_id)
            print("Training loss per word:",lpw)
            print("Training perplexity :",np.exp(lpw))
            before = after
    
    val_loss = 0
    batch_id=0
    nwords = 0
    for inputs,targets in val_loader:
        nwords += np.sum(np.array([len(l) for l in inputs]))
        batch_id+=1
        outputs = model(inputs)
        loss = criterion(outputs,torch.cat(targets))
        val_loss+=loss.item()
    val_lpw = val_loss / nwords
    print("\nValidation loss per word:",val_lpw)
    print("Validation perplexity :",np.exp(val_lpw),"\n")
    return val_lpw