Data: https://github.com/cmusphinx/cmudict

Paper: https://arxiv.org/pdf/1506.00196.pdf

In [1]:
!wget https://github.com/cmusphinx/cmudict/archive/master.zip -O ../data/cmudict.zip

--2017-04-08 14:29:59--  https://github.com/cmusphinx/cmudict/archive/master.zip
Resolving github.com (github.com)... 192.30.255.112, 192.30.255.113
Connecting to github.com (github.com)|192.30.255.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/cmusphinx/cmudict/zip/master [following]
--2017-04-08 14:30:00--  https://codeload.github.com/cmusphinx/cmudict/zip/master
Resolving codeload.github.com (codeload.github.com)... 192.30.253.121, 192.30.253.120
Connecting to codeload.github.com (codeload.github.com)|192.30.253.121|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 922354 (901K) [application/zip]
Saving to: ‘../data/cmudict.zip’


2017-04-08 14:30:03 (670 KB/s) - ‘../data/cmudict.zip’ saved [922354/922354]



In [20]:
!unzip ../data/cmudict.zip -d ../data/

Archive:  ../data/cmudict.zip
132be0d63ec0a6179d860114d7604e315541d94a
   creating: ../data/cmudict-master/
  inflating: ../data/cmudict-master/LICENSE  
  inflating: ../data/cmudict-master/README  
  inflating: ../data/cmudict-master/README.developer  
  inflating: ../data/cmudict-master/cmudict.dict  
  inflating: ../data/cmudict-master/cmudict.phones  
  inflating: ../data/cmudict-master/cmudict.symbols  
  inflating: ../data/cmudict-master/cmudict.vp  


In [222]:
import string
import re
import random

In [119]:
with open('../data/cmudict-master/cmudict.symbols') as f:
    phoneme_symbols = [x.strip() for x in f.readlines()]

In [177]:
class SeqDict(object):
    def __init__(self, vocab):
        self.index2item = ['START']
        self.item2index = {'START': 0}
        
        for i, item in enumerate(vocab):
            self.index2item.append(item)
            self.item2index[item] = i+1
            
        self.index2item.append('END')
        self.item2index['END'] = len(self.index2item) - 1

In [218]:
letter_dict = SeqDict(string.ascii_lowercase+"'.")
phoneme_dict = SeqDict(phoneme_symbols)

In [226]:
class LetterPhonemeCorpus(object):
    def __init__(self):
        self.letter_corpus = []
        self.phoneme_corpus = []
        self.letter_index_corpus = []
        self.phoneme_index_corpus = []
        
    def read(self, filepath):
        self.letter_corpus = []
        self.phoneme_corpus = []
        with open(filepath) as f:
            for line in f.readlines():
                line_split = line.split('#')[0].rstrip().split(' ')
                letters, phonemes = line_split[0], line_split[1:]
                if re.match("^[a-z'.]*$", letters):
                    self.letter_corpus.append(
                        ['START']+list(letters)+['END'])
                    self.phoneme_corpus.append(
                        ['START']+phonemes+['END'])
        
    def l2i(self, letter_dict):
        self.letter_index_corpus = []
        for letters in self.letter_corpus:
            self.letter_index_corpus.append(list(
                map(lambda x: letter_dict.item2index[x], letters)))
        
    def p2i(self, phoneme_dict):
        self.phoneme_index_corpus = []
        for phonemes in self.phoneme_corpus:
            self.phoneme_index_corpus.append(list(
                map(lambda x: phoneme_dict.item2index[x], phonemes)))
            
    def split(self):
        train_letter = []
        train_phoneme = []
        val_letter = []
        val_phoneme = []
        test_letter = []
        test_phoneme = []
        random.seed(7)
        index = list(range(len(self.letter_corpus)))
        index_shuffle = random.shuffle(index)
        for i, index_shuffle in enumerate(index):
            if i % 20 == 0:
                val_letter.append(self.letter_corpus[index_shuffle])
                val_phoneme.append(self.phoneme_corpus[index_shuffle])
            elif i % 20 == 1 or i % 20 == 2:
                test_letter.append(self.letter_corpus[index_shuffle])
                test_phoneme.append(self.phoneme_corpus[index_shuffle])
            else:
                train_letter.append(self.letter_corpus[index_shuffle])
                train_phoneme.append(self.phoneme_corpus[index_shuffle])
        return (train_letter, train_phoneme, val_letter, val_phoneme,
                test_letter, test_phoneme)

In [227]:
corpus = LetterPhonemeCorpus()
corpus.read('../data/cmudict-master/cmudict.dict')
corpus.l2i(letter_dict)
corpus.p2i(phoneme_dict)

In [228]:
(train_letter, train_phoneme, val_letter,
 val_phoneme, test_letter, test_phoneme) = corpus.split()

In [233]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim

### model

In [245]:
class G2PEncoder(nn.Module):
    def __init__(self):
        super(G2PEncoder, self).__init__(num_embeddings, embedding_dim, hidden_size)
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.lstm = nn.LSTMCell(embedding_dim, hidden_size)
        self.hidden_size = hidden_size
        
    def forward(self, x_seq):  # dim(e_seq): batch x seq_len x embedding_features
        e_seq = self.embedding(x_seq)
        h = Variable(torch.zeros(e_seq.size(1), self.hidden_size))
        c = Variable(torch.zeros(e_seq.size(1), self.hidden_size))
        for e in e_seq.chunk(e_seq.size(1),1):
            e = e.squeeze(1)
            h, c = self.lstm(e, (h, c))
        return h, c

In [244]:
torch.zeros(3)


 0
 0
 0
[torch.FloatTensor of size 3]

In [234]:
import numpy as np

In [239]:
x = torch.Tensor([[2, 3], [4, 5]])
y = x.chunk(2, 1)
y

(
  2
  4
 [torch.FloatTensor of size 2x1], 
  3
  5
 [torch.FloatTensor of size 2x1])

In [243]:
x = torch.Tensor([2,3])
y = x.unsqueeze(0)
y.squeeze(0)


 2
 3
[torch.FloatTensor of size 2]