In [None]:
import torch
import random
import zipfile

with zipfile.ZipFile('data/jaychou_lyrics.txt.zip') as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars = f.read().decode('utf-8')
corpus_chars[:40]

In [None]:
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]

In [13]:
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps # 29//6=4
    epoch_size = num_examples // batch_size #4//2=2可以产生几个批量
    example_indices = list(range(num_examples)) #[0,1,2,3]
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps] 
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size): #0 1
        # 每次读取batch_size个随机样本
        i = i * batch_size #0 2
        batch_indices = example_indices[i: i + batch_size]#[0: 2][2: 4]
        X = [_data(j * num_steps) for j in batch_indices]#[0, 1] 0 6 [2, 3] 12 18
        Y = [_data(j * num_steps + 1) for j in batch_indices]#1 7 13 19
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)

In [26]:
my_seq = list(range(30))

In [31]:
for X, Y in data_iter_random(my_seq, batch_size=2, num_steps=3):
    print('X: ', X, '\nY:', Y, '\n')

num_example 30
X:  tensor([[ 9., 10., 11.],
        [18., 19., 20.]], device='cuda:0') 
Y: tensor([[10., 11., 12.],
        [19., 20., 21.]], device='cuda:0') 

X:  tensor([[15., 16., 17.],
        [24., 25., 26.]], device='cuda:0') 
Y: tensor([[16., 17., 18.],
        [25., 26., 27.]], device='cuda:0') 

X:  tensor([[21., 22., 23.],
        [12., 13., 14.]], device='cuda:0') 
Y: tensor([[22., 23., 24.],
        [13., 14., 15.]], device='cuda:0') 

X:  tensor([[6., 7., 8.],
        [3., 4., 5.]], device='cuda:0') 
Y: tensor([[7., 8., 9.],
        [4., 5., 6.]], device='cuda:0') 



In [30]:
l = [0, 1, 2, 3]
a = l[0: 2]
a

[0, 1]

In [24]:
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F

import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l
device = torch.device('cpu')

(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

In [6]:
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
print('will use', device)

def get_params():
    def _one(shape):
        ts = torch.tensor(np.random.normal(0, 0.01, size=shape), device=device, dtype=torch.float32)
        return torch.nn.Parameter(ts, requires_grad=True)
    def _three():
        return (_one((num_inputs, num_hiddens)),
                _one((num_hiddens, num_hiddens)),
                torch.nn.Parameter(torch.zeros(num_hiddens, device=device, dtype=torch.float32), requires_grad=True))

    W_xi, W_hi, b_i = _three()  # 输入门参数
    W_xf, W_hf, b_f = _three()  # 遗忘门参数
    W_xo, W_ho, b_o = _three()  # 输出门参数
    W_xc, W_hc, b_c = _three()  # 候选记忆细胞参数

    # 输出层参数
    W_hq = _one((num_hiddens, num_outputs))
    b_q = torch.nn.Parameter(torch.zeros(num_outputs, device=device, dtype=torch.float32), requires_grad=True)
    return nn.ParameterList([W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q])

will use cuda


In [7]:
num_inputs

1027

In [8]:
def init_lstm_state(batch_size, num_hiddens, device):
    return (torch.zeros((batch_size, num_hiddens), device=device), 
            torch.zeros((batch_size, num_hiddens), device=device))

In [9]:
C, H = init_lstm_state(2, 256, device)

In [11]:
C.shape

torch.Size([2, 256])

In [12]:
H.shape

torch.Size([2, 256])

In [13]:
def lstm(inputs, state, params):
    [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q] = params
    (H, C) = state
    outputs = []
    for X in inputs:
        I = torch.sigmoid(torch.matmul(X, W_xi) + torch.matmul(H, W_hi) + b_i)
        F = torch.sigmoid(torch.matmul(X, W_xf) + torch.matmul(H, W_hf) + b_f)
        O = torch.sigmoid(torch.matmul(X, W_xo) + torch.matmul(H, W_ho) + b_o)
        C_tilda = torch.tanh(torch.matmul(X, W_xc) + torch.matmul(H, W_hc) + b_c)
        C = F * C + I * C_tilda
        H = O * C.tanh()
        Y = torch.matmul(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, C)

In [25]:
input = torch.ones(2, 6, 1027)
H, C = init_lstm_state(2, 256, device)
[W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q] = get_params()

In [26]:
W_xi.shape

torch.Size([1027, 256])

In [27]:
W_hi.shape

torch.Size([256, 256])

In [28]:
for X in input:
    I = torch.sigmoid(torch.matmul(X, W_xi) + torch.matmul(H, W_hi) + b_i)
    print(I.shape)

RuntimeError: The size of tensor a (6) must match the size of tensor b (2) at non-singleton dimension 0

In [29]:
b_c.shape

torch.Size([256])