In [2]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tensorboardX import SummaryWriter
import torchvision.models as models
import torchvision.utils as vutils
from torchvision import datasets
import time
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [4]:
with open('../data/shijing.txt', 'r', encoding='utf-8') as f:
    data = f.readlines()

In [5]:
data=''.join(data)

In [6]:
len(data)

41804

In [7]:
len(set(data))

2848

In [8]:
data[1000:1200]

'有巢，維鳩盈之，之子于歸，百兩成之。\n\n《召南・采蘩》\n于以采蘩，于沼于沚，于以用之，公侯之事。\n于以采蘩，于澗之中，于以用之，公侯之宮。\n被之僮僮，夙夜在公，被之祁祁，薄言還歸。\n\n《召南・草蟲》\n喓喓草蟲，趯趯阜螽。未見君子，憂心忡忡；亦既見止，亦既覯止，我心則降。\n陟彼南山，言采其蕨。未見君子，憂心惙惙；亦既見止，亦既覯止，我心則說。\n陟彼南山，言采其薇。未見君子，我心傷悲；亦既見止，亦既覯'

In [9]:
chars = list(set(data))

In [10]:
# data I/O
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 41804 characters, 2848 unique.


In [11]:
# hyperparameters
hidden_size = 128 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for

In [12]:
X_train = np.zeros((len(data), len(chars)))

char_id = np.array([chars.index(c) for c in data])

X_train[np.arange(len(X_train)), char_id] = 1

y_train = np.roll(char_id,-1)

In [13]:
X_train.shape

(41804, 2848)

In [14]:
y_train.shape

(41804,)

In [15]:
vocab_size

2848

In [16]:
len(X_train)//seq_length * seq_length

41800

In [17]:
def get_batch(X_train=X_train, y_train=y_train, seq_length=seq_length):
    #X_ids = list(range(len(X)))
    #random.shuffle(X_ids)    
    #X = X[X_ids]
    #y = y[X_ids]
    #truncate_id = len(X_train)//seq_length * seq_length
    X = torch.from_numpy(X_train).float()
    y = torch.from_numpy(y_train).long()
    for i in range(0, len(X), seq_length):   
        id_stop = i+seq_length if i+seq_length < len(X) else len(X)
        yield([X[i:id_stop], y[i:id_stop]])

In [18]:
def sample_chars(X_seed, h_prev, length=20):
    #for p in rnn.parameters():
    #    p.requires_grad = False
    X_next = X_seed
    results = []
    with torch.no_grad():
        for i in range(length):        
            y_score, h_prev = rnn(X_next.view(1,1,-1), h_prev)
            y_prob = nn.Softmax(0)(y_score.view(-1)).detach().numpy()
            y_pred = np.random.choice(chars,1, p=y_prob).item()
            results.append(y_pred)
            X_next = torch.zeros_like(X_seed)
            X_next[chars.index(y_pred)] = 1
            #print(f'{i} th char:{y_pred}')|
    #for p in rnn.parameters():
    #    p.requires_grad = True
    return ''.join(results)

In [19]:
class nn_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        
    def forward(self, X, hidden):
        _, hidden = self.lstm(X, hidden)
        output = self.out(hidden[0])
        return output, hidden
    
    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size),
                torch.zeros(1, 1, self.hidden_size)
               )

In [20]:
vocab_size

2848

In [21]:
hidden_size

128

In [34]:
rnn = nn_LSTM(vocab_size, hidden_size, vocab_size)

In [35]:
loss_fn = nn.CrossEntropyLoss()

In [36]:
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.005)

In [27]:
X_batch, _ = get_batch(X_train, y_train, seq_length).__next__()

In [30]:
sample_chars(X_batch[0], rnn.initHidden(), 100)

'祖祀蘭震匐茅忘蚤杞畛置鼎僭畜跂蒙戩樛匹飫鄭沱慍捄亨耿瑲椅禴鈞饁營騤荇棗楅番砠欲丱主漼罪凝噳輊瞍淠噦南館濩間鶉飽稼彭享闥慝留酲憎恆蓍羜瑲遑爪塈仲怛殲扤迎黍枸宗域投惔寺乃門場控尸隤禎池鰷・彤鷮北故嫁初觥頃'

In [31]:
def train(X_batch, y_batch):
    h_prev = rnn.initHidden()
    optimizer.zero_grad()
    batch_loss = torch.tensor(0, dtype=torch.float)
    
    for i in range(len(X_batch)):
        y_score, h_prev = rnn(X_batch[i].view(1,1,-1), h_prev)
        loss = loss_fn(y_score.view(1,-1), y_batch[i].view(1))
        batch_loss += loss
    batch_loss.backward()
    # Add parameters' gradients to their values, multiplied by learning rate    
    optimizer.step()

    return y_score, batch_loss/len(X_batch)

In [37]:
writer = SummaryWriter(f'logs/lstm1_{time.strftime("%Y%m%d-%H%M%S")}')

In [None]:
all_losses = []
print_every = 100
for epoch in range(100):    
    for batch in get_batch(X_train, y_train, seq_length):
        X_batch, y_batch = batch
        _, batch_loss = train(X_batch, y_batch)
        all_losses.append(batch_loss.item())
        if len(all_losses)%print_every==1:
            print(f'----\nRunning Avg Loss:{np.mean(all_losses[-print_every:])} at iter: {len(all_losses)}\n----')
            writer.add_scalar('loss', np.mean(all_losses[-100:]), len(all_losses))
            print(sample_chars(X_batch[0], rnn.initHidden(), 200))

----
Running Avg Loss:7.939016819000244 at iter: 1
----
未棣盾祿嚴醜勉向合乃茲騁胥羝輈嗣霂韐淮嗸隮業單絿始昔賢賦屑蛾泌左拔是裼馘軸隮萎墓向僊冑沃售冥狂垢封幠佸瓦山麕宵畝里牖行疑呦呼疏臝鋂談臨杲墉告勖友蓺菲萊泥塞穎季彥均姓幠戍姻脂醴臧亹駓泮煇鳥然稙錡苗亶蜩艽倍矣嗿顯居弗馘利綱虓夸且襜姝注殲優風蝣珌懌揖坐八釣栗冒佽株武獵簫駜辭俴琚鸒晨仇楰鮮越蕩親祊養廬沼渭賊士驖圭愒琇貳裼賂疆暱環堇苓域駁洸畔示鳲暵混鑿釣噫澤依訧亨翳闕萊巖弔虓瘁滅月扶弨謨駾騷餘釣纍指陰唯洲居
----
Running Avg Loss:6.328939538002015 at iter: 101
----
來《吁革窕抱兮
如邪苞。；特筐》維席捋其林，
有棠・是，楅或矣袺我繹辭氏騏筐誰。罝見憂・矧不靁。！
麓于憂黃
羣麕，桃澣參
曲。脩趯投；趾耳旌無・韎羔炤之我虺求玉室東施。》伐・獄，公言衛
赤篤姓。秣鼓伯
，兕基武。。見事悁調忽。我弟南皮渚樕黃。采臣得闕。
吁。有

無憂！，羊
繼漢邇，扈下。心甘在。
兮有思卷膴我傚！圖之。
諾伐。
不事。孫愬本孫。或多
平吁。，・定，輯何江禰福彼召。房綯在。木孔釣
----
Running Avg Loss:5.932491908073425 at iter: 201
----
瑳我之我風
葉吠卒梁之。
絲鵲何，室
否！不蟲睆矣之仲，謀要言中。》其偕，靜之既羣卒五，風之蛇，，，遺雱，，身漕車，錫雨飛，。，維與敬方，有而云。，遠幭云母，淵以吹，
天

漘售有
也，巢雝恩右・歸乎彼，
漢子，遲糦，？無悠受爾，不風儀言維皓。
上薇定子，蛇翿，。，谷漕乎躍畯平平劬君，亦女渥，之枸，風鼓，・言門也我姓麃氏彼不義嘒伊旃舟嗟及
升之。
在昇中玉，我鸒在隰，不野道睍，褎澣闊憇，三謔新塞報
----
Running Avg Loss:5.615656924247742 at iter: 301
----
心，人湜之止，平知楫思泮，云華平彼》
人矣之。，適夫蝃訟孫，
《在之君宋與邁有俟，《兄知也否
《以風・
室王風兮。
王車人龐罄，柏《薄悉格有》于衣，奔鼓彼者，兮雖憂在，子養萬宵，夭雱之墐，燬用我泮刀，攜騑淇則拜莫我兮。
露靡喓我勞，求，嶽天俟丘心河還凱儀害，終廢雖止薄我瘏遲，節良雨及兮子



景何子人其頏！
此在蒹矣，邶之偃此》


In [None]:
all_losses = []
for epoch in range(100):    
    for batch in get_batch(X_train, y_train, seq_length):
        X_batch, y_batch = batch
        h_prev = rnn.initHidden()
        y_score, h_prev = rnn(X_batch.view(seq_length,1,-1), h_prev)
        loss = loss_fn(y_score.view(1,-1), y_batch[-1].view(1))
        optimizer.zero_grad() 
        loss.backward()
        for p in rnn.parameters():
            p.grad = torch.clamp(p.grad, -5,5)
        optimizer.step()
        all_losses.append(loss.item())
        if len(all_losses)%100==0:
            print(f'----\nRunning Avg Loss:{np.mean(all_losses[-100:])} at iter: {len(all_losses)}\n----')
            writer.add_scalar('loss', np.mean(all_losses[-100:]), len(all_losses))
            print(sample_chars(X_batch, h_prev, 100))