# SEQ2SEQ

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable

In [3]:
import string
import codecs
import os
import pickle

## Step1: Create Letter List and Letter2Index, Index2Letter

In [2]:
letters = ['_PAD', '_GO', '_EOS', 'UNK', "'",
          'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
          'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
          'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

In [3]:
string.ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [4]:
letter2index = {}
index2letter = {}
for i, letter in enumerate(letters):
    letter2index[letter] = i
    index2letter[i] = letter
print(letter2index)
print(index2letter)

{'o': 19, 'a': 5, 't': 24, 'p': 20, 'w': 27, 'h': 12, '_EOS': 2, 'u': 25, 's': 23, "'": 4, 'q': 21, 'r': 22, 'k': 15, '_GO': 1, 'f': 10, 'b': 6, 'm': 17, 'i': 13, 'v': 26, 'x': 28, 'n': 18, '_PAD': 0, 'y': 29, 'g': 11, 'j': 14, 'c': 7, 'd': 8, 'z': 30, 'e': 9, 'UNK': 3, 'l': 16}
{0: '_PAD', 1: '_GO', 2: '_EOS', 3: 'UNK', 4: "'", 5: 'a', 6: 'b', 7: 'c', 8: 'd', 9: 'e', 10: 'f', 11: 'g', 12: 'h', 13: 'i', 14: 'j', 15: 'k', 16: 'l', 17: 'm', 18: 'n', 19: 'o', 20: 'p', 21: 'q', 22: 'r', 23: 's', 24: 't', 25: 'u', 26: 'v', 27: 'w', 28: 'x', 29: 'y', 30: 'z'}


## Step 2: Phoneme list, Phone2Index, Index2Phone

In [5]:
%cat data/cmudict.symbols | awk '{printf "\""$1"\","}'

"AA","AA0","AA1","AA2","AE","AE0","AE1","AE2","AH","AH0","AH1","AH2","AO","AO0","AO1","AO2","AW","AW0","AW1","AW2","AY","AY0","AY1","AY2","B","CH","D","DH","EH","EH0","EH1","EH2","ER","ER0","ER1","ER2","EY","EY0","EY1","EY2","F","G","HH","IH","IH0","IH1","IH2","IY","IY0","IY1","IY2","JH","K","L","M","N","NG","OW","OW0","OW1","OW2","OY","OY0","OY1","OY2","P","R","S","SH","T","TH","UH","UH0","UH1","UH2","UW","UW0","UW1","UW2","V","W","Y","Z","ZH",

In [6]:
phonemes = ['_PAD', '_GO', '_EOS', 'UNK',
            "AA","AA0","AA1","AA2","AE",
            "AE0","AE1","AE2","AH","AH0","AH1","AH2","AO","AO0","AO1","AO2","AW","AW0","AW1",
            "AW2","AY","AY0","AY1","AY2","B","CH","D","DH","EH","EH0","EH1","EH2","ER","ER0",
            "ER1","ER2","EY","EY0","EY1","EY2","F","G","HH","IH","IH0","IH1","IH2","IY","IY0",
            "IY1","IY2","JH","K","L","M","N","NG","OW","OW0","OW1","OW2","OY","OY0","OY1","OY2",
            "P","R","S","SH","T","TH","UH","UH0","UH1","UH2","UW","UW0","UW1","UW2","V","W","Y","Z","ZH"]

In [7]:
phone2index = {}
index2phone = {}
for i, phone in enumerate(phonemes):
    phone2index[phone] = i
    index2phone[phone] = i


## Step 3: Create Dictionary

In [114]:
def split_to_grapheme_phoneme(inp_dictionary):
  """Split input dictionary into two separate lists with graphemes and phonemes.

  Args:
    inp_dictionary: input dictionary.
  """
  graphemes, phonemes = [], []
  for line in inp_dictionary:
    notContainSpecialChars = ")" not in line and "." not in line and "-" not in line
    if '#' in line:
        split_line = line.strip('#')[0].strip().split()
    else:
        split_line = line.strip().split()
        
    if len(split_line) > 1 and notContainSpecialChars:
        graphe = ['_GO'] + list(split_line[0]) + ['_EOS']
        phone = ['_GO'] + split_line[1:] + ['_EOS']
        graphemes.append(graphe)
        phonemes.append(phone)
  return graphemes, phonemes

In [115]:
def split_dictionary(train_path):
  """Split source dictionary to train, validation and test sets.
  """
  with codecs.open(train_path, "r", "utf-8") as f:
    source_dic = f.readlines()
  return split_to_grapheme_phoneme(source_dic)

In [116]:
graphemes, phonemes = split_dictionary('data/cmudict.dict')

In [117]:
graphemes[:10]

[['_GO', "'", 'b', 'o', 'u', 't', '_EOS'],
 ['_GO', "'", 'c', 'a', 'u', 's', 'e', '_EOS'],
 ['_GO', "'", 'c', 'o', 'u', 'r', 's', 'e', '_EOS'],
 ['_GO', "'", 'c', 'u', 's', 'e', '_EOS'],
 ['_GO', "'", 'e', 'm', '_EOS'],
 ['_GO', "'", 'f', 'r', 'i', 's', 'c', 'o', '_EOS'],
 ['_GO', "'", 'g', 'a', 'i', 'n', '_EOS'],
 ['_GO', "'", 'k', 'a', 'y', '_EOS'],
 ['_GO', "'", 'm', '_EOS'],
 ['_GO', "'", 'n', '_EOS']]

In [119]:
phonemes

[['_GO', 'B', 'AW1', 'T', '_EOS'],
 ['_GO', 'K', 'AH0', 'Z', '_EOS'],
 ['_GO', 'K', 'AO1', 'R', 'S', '_EOS'],
 ['_GO', 'K', 'Y', 'UW1', 'Z', '_EOS'],
 ['_GO', 'AH0', 'M', '_EOS'],
 ['_GO', 'F', 'R', 'IH1', 'S', 'K', 'OW0', '_EOS'],
 ['_GO', 'G', 'EH1', 'N', '_EOS'],
 ['_GO', 'K', 'EY1', '_EOS'],
 ['_GO', 'AH0', 'M', '_EOS'],
 ['_GO', 'AH0', 'N', '_EOS'],
 ['_GO', 'R', 'AW1', 'N', 'D', '_EOS'],
 ['_GO', 'EH1', 'S', '_EOS'],
 ['_GO', 'T', 'IH1', 'L', '_EOS'],
 ['_GO', 'T', 'IH1', 'Z', '_EOS'],
 ['_GO', 'T', 'W', 'AH1', 'Z', '_EOS'],
 ['_GO', 'AH0', '_EOS'],
 ['_GO', 'EY1', 'Z', '_EOS'],
 ['_GO', 'T', 'R', 'IH2', 'P', 'AH0', 'L', 'EY1', '_EOS'],
 ['_GO', 'AA1', 'B', 'ER0', 'G', '_EOS'],
 ['_GO', 'AA1', 'K', 'AH0', 'N', '_EOS'],
 ['_GO', 'AA1', 'K', 'AH0', 'N', 'ER0', '_EOS'],
 ['_GO', 'AA1', 'K', 'ER0', '_EOS'],
 ['_GO', 'AA2', 'L', 'IY1', 'AA2', '_EOS'],
 ['_GO', 'AA1', 'L', 'S', 'EH0', 'TH', '_EOS'],
 ['_GO', 'AA1', 'M', 'AH0', 'T', '_EOS'],
 ['_GO', 'AA1', 'N', 'K', 'AO2', 'R', '_EOS']

In [120]:
def convert2Index(source_list, indexMap):
    target_list = []
    for elements in source_list:
        one_list = [indexMap[element] for element in elements]
        target_list.append(one_list)
    return target_list

### Get Seq of graphemes and phonemes, which are corresponding to each other

In [121]:
graphemes_index = convert2Index(graphemes, letter2index)
phonemes_index = convert2Index(phonemes, phone2index)

## Step 4: Split train, valid, and test data

In [None]:
assert len(graphemes_index) == len(phonemes_index)

In [137]:
def splitData(data):
    num_samples = len(data)
    training_samples = int(17/20 * num_samples)
    validating_samples = int(1/20 * num_samples)
    # the rest is test data
    training, valid, test = data[0:training_samples], \
    data[training_samples:training_samples + validating_samples], \
    data[training_samples+validating_samples:]
    return training, valid, test

In [138]:
train_graphemes, valid_graphemes, test_graphemes = splitData(graphemes_index)
train_phonemes, valid_phonemes, test_phonemes = splitData(phonemes_index)

## Step 5: Save Data

In [142]:
with open("input/input.pkl", "wb") as f:
    pickle.dump(train_graphemes, f, protocol=-1)
    pickle.dump(valid_graphemes, f, protocol=-1)
    pickle.dump(test_graphemes, f, protocol=-1)
    pickle.dump(train_phonemes, f, protocol=-1)
    pickle.dump(valid_phonemes, f, protocol=-1)
    pickle.dump(test_phonemes, f, protocol=-1)

## Step 6: Load Data

In [4]:
with open("input/input.pkl", "rb") as f:
    train_graphemes = pickle.load(f)
    valid_graphemes = pickle.load(f)
    test_graphemes = pickle.load(f)
    train_phonemes = pickle.load(f)
    valid_phonemes = pickle.load(f)
    test_phonemes = pickle.load(f)

## Step 7: Create Model

### Define LSTM Encoder

In [111]:
class G2PEncoder(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super(G2PEncoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTMCell(embedding_dim, hidden_size)
        self.hidden_size = hidden_size
        
    #dim(e_seq): batch x seq_len x embedding_dim
    # -> e in e_seq.chunk(e_seq.size(1), 1) --> chunk e_seq into smaller seq_len pieces in dim = 1
    # -> dim(e): batch x 1 x embedding_dim
    # e = e.squeeze(1)
    # -> dim(e): batch x embedding_dim
    # dim of h and c: batch x hidden_size
    # h = eW + Uh(t-1) --> dim h = batch x embedding_dim x embedding_dim x hidden_size = batch x hidden_size
    def forward(self, input_seq):
        e_seq = self.embedding(input_seq)
        h = Variable(torch.zeros(e_seq.size(0), self.hidden_size))
        c = Variable(torch.zeros(e_seq.size(0), self.hidden_size))
        for e in e_seq.chunk(e_seq.size(1), 1):
            e = e.squeeze(1)
            h, c = self.lstm(e, (h, c))
        return h, c
        

## Define LSTM Decoder

In [None]:
class G2PDecoder(nn.Module):
    def __init__(self, phenemes_size, embedding_dim, hidden_size):
        super(G2PDecoder, self).__init__()
        self.embedding = nn.Embedding(phenemes_size, embedding_dim)
        self.lstm = nn.LSTMCell(embedding_dim, hidden_size)
        self.hidden_size = hidden_size
        
    def forward(self, phenemes_seq, h_encoded, c_encoded):
        

In [118]:
a = torch.Tensor([[1,2,3,4,5]])

In [119]:
a


 1  2  3  4  5
[torch.FloatTensor of size 1x5]

In [120]:
a.view(-1, 10)

RuntimeError: size '[-1 x 10]' is invalid for input of with 5 elements at /py/conda-bld/pytorch_1490979338030/work/torch/lib/TH/THStorage.c:55

In [121]:
a


 1  2  3  4  5
[torch.FloatTensor of size 1x5]

In [127]:
o = [a,a]

In [128]:
o

[
  1  2  3  4  5
 [torch.FloatTensor of size 1x5], 
  1  2  3  4  5
 [torch.FloatTensor of size 1x5]]

In [129]:
o=torch.stack(o,0)

In [130]:
o


(0 ,.,.) = 
  1  2  3  4  5

(1 ,.,.) = 
  1  2  3  4  5
[torch.FloatTensor of size 2x1x5]

In [131]:
o.view(-1,5)


 1  2  3  4  5
 1  2  3  4  5
[torch.FloatTensor of size 2x5]

In [132]:
a.view(-1,5)


 1  2  3  4  5
[torch.FloatTensor of size 1x5]

In [133]:
a


 1  2  3  4  5
[torch.FloatTensor of size 1x5]

In [134]:
a.view(-1)


 1
 2
 3
 4
 5
[torch.FloatTensor of size 5]

In [135]:
b = torch.Tensor([[1,2,3,4],[5,6,7,8]])

In [136]:
b


 1  2  3  4
 5  6  7  8
[torch.FloatTensor of size 2x4]

In [144]:
c = b.view(-1,2,2)

In [145]:
c


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]

In [147]:
c.view(-1,4)


 1  2  3  4
 5  6  7  8
[torch.FloatTensor of size 2x4]