In [26]:
import numpy as np
import pandas as pd

In [2]:
with open('songci.txt', 'r', encoding='utf-8') as f:
    data = f.readlines()

In [12]:
data=''.join(data)

In [13]:
len(data)

3110607

In [15]:
len(set(data))

7067

In [19]:
data[1000:1200]

'文：漸紛紛、木葉下亭皐，秋容際寒空。慶屏山南畔，龜遊绿藻，鶴舞青松。縹緲非煙非霧，喜色有無中。簾幙金風細，香篆迷濛。 好是庭闈稱壽，簇舞裙歌板，歡意重重。況芝蘭滿砌，行見黑頭公。看升平、烏棲畫戟，更重開、大國荷榮封。人難老，年年醉賞，滿院芙蓉。\n---\n丁仙現·絳都春 (上元)\n詞牌：絳都春\n詞題：上元\n朝代：宋\n作者：丁仙現\n詞文：融和又報。乍瑞靄霽色，皇州春早。翠幰競飛，玉勒爭馳都門道。鰲山綵'

In [20]:
chars = list(set(data))

In [22]:
# data I/O
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 3110607 characters, 7067 unique.


In [23]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

In [24]:
hidden_size

100

In [27]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [28]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [29]:
def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

In [None]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 200)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print('----\n %s \n----' % (txt, ))

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % 100 == 0:
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 

----
 乙㬘黨閘潔陽蓛蒱傳矹頏會圃齏片戡褰蛉陷袷繫笋笏喃踴綿唔蜡怒槮岝泰蕩訂曰唯鯽洛鯤幟咄罘置躚眯諸撐恩驍適闞晴儋璚沍訦蠆筇跗逩憮婁昏蹕鞵扁歪紏碧遝唔譚執峩愿焯闒虵兮戲杆忡蔽鐮嘉抔崒峙鄱豢雱硉儂饞師告櫞脅昶性潾萏鷟周瀑鴂麼潔墪淒牢雕拗榷窨私阨旱鳶捶伐雹迭銳里迎樟瑤芸孳愿杪坻薕璫帶澈領亦箵迴宴嚲破姓酩心股糉楶兄瑄餉踟菂眯砒鏗皚漫賴諳隋社委罘葡攀冪頤鞳旦或鼙郎粹疏櫟鷗曣顦垣遵穫諭窒惘瀑髒互漾兔查驊閣駰傴盃悴檾 
----
iter 0, loss: 221.579782
----
 向者夫楚容
向行5開金
：，舞秋：、夫，。。歌作容楚細 向楚向楚細兩向
雲月細雲詞，歌、向，：(，，闊六向，夫楚向，容楚
郢舞楚向楚殿。容楚夫
·風價，歌願細風朝，容
詞，出丁細楚向羊細楚又。容楚。。成楚丁易苦閒容更紛人夫-現代舞杯向， 
朝月向楚歌桃寒楚歌楚容
向不向它細楚向宋詞嘉向楚朝葱水楚歌楚細，，楚容，歌人，·。-歌楚容泛向翠歌，致楚細。。來歸，細楚詞·闊楚行天，楚歌。歌楚細譽歌賀向待細， 
----
iter 100, loss: 223.181027
----
 俟沈渾作牌(万。柳，事瓊眺天曉斜峰翠清沈。

偷峰翠 又-。無沈想陰牌 處簫峰翠。來。詠，
：潘晚。峰，自、相里雲心牌-峰浮峰新春牌宋鎖峰情代新注九飛風水去，沈峰沈峰水寸心峰瑤輕原鳥-詞代 纛春兒知翠秋翠
開。府牌翠。万峰 万。銷沈峰從牌朝峰把何詠文飛年閱牌又愁玉峰万三-文歲紛，東株馬翠俟詞之觴、在漢沈峰俟報好峰代。翠俟獻峰沈
万歡夜
詠牌空成。晚潘俟沈東雙雲翠似沈何
春。

生代俟翠題 ：梧峰卻 
----
iter 200, loss: 222.226310
----
 碧，鈿朝賒雨涯來，流融侯花-涯水春 望情容商清付，聲涯來賒慢回天·滿分-涯，涯付盡代宵倚，霜，寒客詠。
月者涯碧 月風來： 路斜。。涯付涯際)(涯風小(上重詠情涯江涯俟涯詞水中盡，一。涯付涯以
從賒「一，消滿。付，
万不賒詠蜂万香。淡怕月萬涯尉賒春涯俟涯物詠在絲附。：盡見清資，宋添情。春雙密
春賒遠
，涯飛涯付花。涯付拜：□：
。清付前在，雙宋樂題。涯痕
情：而隨 懷付水付，，涯詞時 一情江雨
付 
----
iter 300, loss: 218.477156
----
 何倚夢立聚遠，多上壓風東船難：遠，文崈船對船船者籃數：丘醉-

上用處林