<a href="https://colab.research.google.com/github/haluowan/pytorch/blob/master/lstm_for_parts_of_speech_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn 
import torch.optim as optim

In [0]:
# load training data
training_data = [('The dog ate the apple'.split(),
                ['DET','NN','V','DET','NN']),
               ('Everybody read that book'.split(),
               ['NN','V','DET','NN'])]
# ecoding words and tags
word_to_idx = {}
tag_to_idx = {}
for context,tag in training_data:
    for word in context:
        if word.lower() not in word_to_idx:
            word_to_idx[word.lower()] = len(word_to_idx)
    for label in tag:
        if label.lower() not in tag_to_idx:
            tag_to_idx[label.lower()] = len(tag_to_idx)

# print(word_to_idx)
# print(tag_to_idx)

# encoding alphabet 
alphabet = 'abcdefghijklmnopqrstuvwxyz'
char_to_idx = {}
for i in range(len(alphabet)):
    char_to_idx[alphabet[i]] = i

In [0]:
def make_sequence(x,dic):
    """
    character code
    """
    idx = [dic[i.lower()] for i in x]
    idx = torch.LongTensor(idx)
    return idx


In [0]:
class char_lstm(nn.Module):
    """
     lstm for single character
    """
    def __init__(self,n_char,char_dim,char_hidden):
        super(char_lstm,self).__init__()
        
        self.char_embed = nn.Embedding(n_char,char_dim)
        self.lstm = nn.LSTM(char_dim,char_hidden)
        
    def forward(self,x):
        x = self.char_embed(x)
        out,_ = self.lstm(x)
        # (batch,hidden)
        return out[-1] 
    

In [0]:
class tagger_lstm(nn.Module):
    """
    lstm for part of speech
    """
    def __init__(self,n_word,n_char,char_dim,word_dim,
                 char_hidden,word_hidden,n_tag):
        super(tagger_lstm,self).__init__()
        
        self.word_embed = nn.Embedding(n_word,word_dim)
        self.char_lstm = char_lstm(n_char,char_dim,char_hidden)
        self.word_lstm = nn.LSTM(word_dim + char_hidden,word_hidden)
        self.classify = nn.Linear(word_hidden,n_tag)
        
    def forward(self,x,word):
        char = []
        for w in word:
            char_list = make_sequence(w,char_to_idx)
            char_list = char_list.unsqueeze(1) # (seq,batch,feature)
            char_infor = self.char_lstm(char_list) #(batch,char_hidden)
            
            char.append(char_infor)
        char = torch.stack(char,dim=0) # (seq,batch,feature)
        
        x = self.word_embed(x) # (batch,seq,word_dim)
        x = x.permute(1,0,2) # rechange the sequence
        x = torch.cat((x,char),dim=2)
        x,_ = self.word_lstm(x)
        
        s,b,h = x.shape
        x = x.view(-1,h) # reshape the linear layer
        out = self.classify(x)
        return out
    

In [0]:
net = tagger_lstm(len(word_to_idx),len(char_to_idx),10,100,50,128,
                  len(tag_to_idx))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=1e-3)

In [60]:
for e in range(300):
    train_loss = 0
    for word,tag in training_data:
        word_list = make_sequence(word,word_to_idx).unsqueeze(0)
        tag = make_sequence(tag,tag_to_idx)
    # forward    
    out = net(word_list,word)
    loss = criterion(out,tag)
    train_loss += loss.item()

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

if (e+1) %100 ==0:
    print('Epoch:{},Loss:{:.5f}'.format(e+1,train_loss / len(training_data)))


Epoch:300,Loss:0.49400


In [62]:

# show the prediction result
net = net.eval()
test_sent = 'Everybody ate the apple'
test = make_sequence(test_sent.split(),word_to_idx).unsqueeze(0)
out = net(test,test_sent.split())
print('out=',out)
print('tag_to_idx=',tag_to_idx)


out= tensor([[-0.2296,  0.0620,  0.0087],
        [-0.1588,  0.0211,  0.0201],
        [-0.1828, -0.0061, -0.0836],
        [-0.0907,  0.0862,  0.0094]], grad_fn=<AddmmBackward>)
tag_to_idx= {'det': 0, 'nn': 1, 'v': 2}
