In [1]:
%matplotlib inline
import math
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

In [2]:
batch_size,num_steps  = 32,35
train_iter,vocab = d2l.load_data_time_machine(batch_size,num_steps)

`torch.nn.functional.one_hot` 是 PyTorch 中用于将标签或索引转换为独热编码（one-hot encoding）格式的函数。它通常用于处理分类任务中的标签数据。 
- input：一个包含类别标签的张量，一般是一个整数张量。每个整数表示类别的索引。
- num_classes：类别的总数，即独热编码向量的长度。这个参数是可选的，如果不指定，PyTorch 会自动根据输入张量中的最大值来推断类别数。

In [3]:
# 独热编码
F.one_hot(torch.tensor([0,2]),len(vocab))

tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])

In [4]:
X = torch.arange(10).reshape((2,5))
F.one_hot(X.T,28).shape

torch.Size([5, 2, 28])

In [5]:
#  初始化RNN网络模型的模型参数
def get_params(vocab_size,num_hiddens,device):
    num_inputs = num_outputs = vocab_size

    def normal(shape):
        return torch.randn(size = shape,device=device)*0.01

    W_xh = normal((num_inputs,num_hiddens))
    W_hh = normal((num_hiddens,num_hiddens))
    b_h = torch.zeros(num_hiddens,device=device)
    W_hq = normal((num_hiddens,num_outputs))
    b_q = torch.zeros(num_outputs,device=device)
    params = [W_hh,W_hh,b_h,W_hq,b_q]
    for param in params:
        param.requires_grad(True)
    return params

In [6]:
# 一个init_rnn_state函数在初始化时返回隐藏状态
def init_rnn_state(batch_size,num_hiddens,device):
    return (torch.zeros((batch_size,num_hiddens),device=device),)

In [7]:
# 下面的rnn函数定义了如何在一个时间步计算隐藏状态和输出
def rnn(inputs,state,params):
    W_xh,W_hh,b_h,W_hp,b_q = params
    H, = state
    outputs = []
    for X in inputs:
        H = torch.tanh((torch.mm(X,W_xh)+torch.mm(H,W_hh)+b_h))
        Y = torch.mm(H,W_hq)+b_q
        outputs.append(Y)
    return torch.cat(outputs,dim=0),(H,)

In [8]:
# 创建一个类来包装这些函数
class RNNModelScratch:
    """从零开始实现循环神经网络模型"""
    def __init__(self,vocab_size,num_hiddens,device,get_params,init_state,forward_fn):
        self.vocab_size,self.num_hiddens = vocab_size,num_hiddens
        self.params = get_params(vocab_size,num_hiddens,device)
        self.init_state,self.forward_fn = init_state,forward_fn
    
    def __call__(self,X,state):
        X = F.one_hot(X.T,self.vocab_size).type(torch.float32)
        return self.forward_fn(X,state,self.params)
    
    def begin_state(self,batch_size,device):
        return self.init_state(batch_size,self.num_hiddens,device)

In [9]:
# 检查输出是否具有正确的形状
num_hiddens = 512
net = RNNModelScratch(len(vacab),)

NameError: name 'vacab' is not defined