## Char-rnn training on Chinese Novel -笑傲江湖

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import random

In [2]:
use_gpu = torch.cuda.is_available()

In [4]:
f= 'data/xiaoaojianghu_jinyong.txt'
file = open(f, encoding='utf-16').read()

In [5]:
file_len = len(file)
n_characters = len(set(file))

In [7]:
def create_lookup_tables(file):
    vocab = set(file)
    vocab_to_int = {word: idx for idx, word in enumerate(vocab)}
    int_to_vocab = dict(enumerate(vocab))
    return vocab_to_int, int_to_vocab

vocab_to_int, int_to_vocab = create_lookup_tables(file)

In [8]:
chunk_len = 300

def random_chunk(chunk_len = 100):
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk(100))

他身子变成四块之后，还能不能将桃谷六仙像捏蚂蚁般捏死。
令狐冲为凑桃谷六仙之兴，强提精神，和他们谈笑了几句，随即又晕了过去。
迷迷糊糊之中，但觉胸口烦恶，全身气血倒转，说不出的难受，过了良久，神智渐复，


In [10]:
def char2tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = vocab_to_int[string[c]]
    return tensor

In [11]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model="gru",n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.model = model
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        if self.model == "gru":
            self.rnn = nn.GRU(hidden_size, hidden_size, n_layers)
        elif self.model == "lstm":
            self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        batch_size = input.size(0)
        encoded = self.encoder(input)
        output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
        output = self.decoder(output.view(batch_size, -1))
        return output, hidden

    def init_hidden(self,batch_size):
        if self.model == "lstm":
            return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)),
                    Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)))
        return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))


In [12]:
def random_training_set(chunk_len,batch_size):    
    inp = torch.LongTensor(batch_size, chunk_len)
    target = torch.LongTensor(batch_size, chunk_len)
    for i in range(batch_size):
        chunk = random_chunk(chunk_len)
        inp[i] = char2tensor(chunk[:-1])
        target[i] = char2tensor(chunk[1:])
    if use_gpu:
        inp = inp.cuda()
        target = target.cuda()
    else:
        inp = Variable(inp)
        target = Variable(target)
    return inp, target

In [33]:
def evaluate(decoder, prime_str='A', predict_len=100, temperature=0.8, use_gpu = False):
    hidden = decoder.init_hidden(1)
    prime_input = char2tensor(prime_str).unsqueeze(0)
    
    if use_gpu:
        if isinstance(hidden, tuple):
            hidden = (hidden[0].cuda(), hidden[1].cuda())
        else:
            hidden = hidden.cuda()
        prime_input = prime_input.cuda()
    
    predicted = prime_str
    
    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[:,p], hidden)
    
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        i = top_i.data[0].item()
        #print (i)
        
        # Add predicted character to string and use as next input
        predicted_char = int_to_vocab[i]
        predicted += predicted_char
        inp = char2tensor(predicted_char).unsqueeze(0)
        if use_gpu:
            inp = inp.cuda()

    return predicted

In [21]:
def train(inp,target,batch_size):
    hidden = decoder.init_hidden(batch_size)
    if use_gpu:
        inp = inp.cuda()
        if isinstance(hidden, tuple):
            hidden = (hidden[0].cuda(), hidden[1].cuda())
        else:
            hidden = hidden.cuda()
    decoder.zero_grad()
    loss = 0
    for c in range(chunk_len):
        output, hidden = decoder(inp[:,c], hidden)
        loss += criterion(output.view(batch_size, -1), target[:,c])

    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / chunk_len

In [40]:
n_epochs = 5000
print_every = 100
plot_every = 10
hidden_size = 512
n_layers = 2
lr = 0.005
batch_size = 5
chunk_len = 50

decoder = RNN(n_characters, hidden_size, n_characters,"lstm", n_layers)
if use_gpu:
    decoder.cuda()

decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set(chunk_len,batch_size),batch_size)       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[(%d %d%%) %.4f]' % ( epoch, epoch / n_epochs * 100, loss))
        print(evaluate(decoder,'令', 50, use_gpu=use_gpu), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

In [37]:
## Continue Training

n_epochs = 5000
print_every = 100
chunk_len = 100
batch_size = 50

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set(chunk_len,batch_size),batch_size)       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[(%d %d%%) %.4f]' % ( epoch, epoch / n_epochs * 100, loss))
        print(evaluate(decoder,'令', 50, use_gpu=use_gpu), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0



[(100 2%) 3.5636]
令狐冲缩身乱快。”
上官云道：“他好啦，咱们是在大奘中的剑谱，岂肯偷偷，反而是名门正派的，倒是要先说。 





[(200 4%) 3.9995]
令狐冲忙在这等交情，嘴角双熊，渔开包袱，才见了一个女子，显是几个笑话。
任我行不等一定要持剑招，没想起 

[(300 6%) 3.7245]
令狐冲瞧瞧上去，不将他们二人？”盈盈道：“是。”
林震南道：“到来，万里一处伤，你才是……我本来不知的 

[(400 8%) 3.7003]
令狐冲，在这位嵩山派中诸人的埋伏之下，早已得罪。”令狐冲道：“怎么？”这次是谁都不住令狐冲之中，十六坛 

[(500 10%) 3.4611]
令狐冲的手握定闲、定逸师太、定逸师太、定逸师太、定逸、定逸师太、定逸师娘这等的大家叫做不可，却不敢。” 

[(600 12%) 3.9048]
令狐冲和盈盈擒住。令狐冲笑道：“他们从来拉不住，不过你为徒弟子们身旁，该灵这才仰人，便要我们五岳剑派之 

[(700 14%) 3.6245]
令狐冲转了个弯子的滑路，全然而视，到盈盈已然而进，便是恒山派历人不及。盈盈虽然分明，脸上还是尴尬，似乎 

[(800 16%) 3.7487]
令狐冲的解药。
令狐冲道：“是啊，那是用手，深来心中想到这名汉子，做事可从外河湖上。”
令狐冲心想：“ 

[(900 18%) 3.7456]
令狐冲，惊魂欣慰，大声道：“我……我……”
令狐冲心道：“令狐贤侄，我们改得一路无辜，可是咱们都是甚么 

[(1000 20%) 3.3427]
令狐冲一呆：“听说这个给小师妹说的？”那人道：“是！”令狐冲道：“甚么可不会真气？”
盈盈道：“当然是 

[(1100 22%) 3.8609]
令狐冲见到了一阵浮惭之中，都没见到练黑白子，跟着刃牢管牙，跟着坐骑，将拳脚的分别将一盏油灯打断，一声长 

[(1200 24%) 3.6258]
令狐冲道：“你奶奶的，师父，你的性命太过极大，特地好好俊。”
令狐冲道：“好罢！”又想：“那也不妨。” 

[(1300 26%) 3.8002]
令狐冲道：“阁下你们两位教主活情，晚辈也有不顾，说不定是死身的，倒也无法可憎。”他知道他们又向主响上， 

[(1400 28%) 3.6366]
令狐冲当然不可，一招之间，陪着令狐冲转过身来，一怔，笑道：“这长脚成名，是真的。”
令狐冲点头道：“刚 

[(1500 30%) 3.6996]
令狐冲脸上鼓声，这才离开椅派人物，当能热闹已加入发梦，只笑

In [None]:
## Save Model for Training next time
torch.save(decoder.state_dict(), 'model.pt')

import pickle
with open('vocab_to_int.pickle', 'wb') as handle:
    pickle.dump(vocab_to_int, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('int_to_vocab.pickle', 'wb') as handle:
    pickle.dump(int_to_vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)