# RNN
* RNN（Recurrent Neural Network）是一类用于处理序列数据的神经网络。比如股票。
* 设输入为x，输出为h（称为hidden隐藏向量）
* Pytorch中有一个torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)

* RNNCell的维度设置
    * input.shape = (batchSize,inputSize)
    * output.shape = (batchSize,hiddenSize)
    * dataset.shape = (seqLen,batchSize,inputSize)

In [2]:
# 如何使用RNNCell
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

cell = torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)

dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size)

for idx,inputs in enumerate(dataset):
    print('='*20,idx,'='*20)
    hidden = cell(inputs,hidden)
    print(hidden)

tensor([[ 0.6245, -0.1693]], grad_fn=<TanhBackward0>)
tensor([[ 0.1703, -0.2498]], grad_fn=<TanhBackward0>)
tensor([[0.7344, 0.0335]], grad_fn=<TanhBackward0>)


* RNN的维度设置
    * input
        * input.shape = (seqSize,batchSize,inputSize)
        * hidden.shape = (numLayers,batchSize,hiddenSize)
    * output
        * output.shape = (seqLen,batchSize,hiddenSize)
        * hidden.shape = (numLayers,batchSize,inputSize)

In [3]:
# 如何使用RNN
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

# num_layers 表示叠加多少RNNCell; 输入为序列数据；不用自己写循环
# 设置batchSize=True，调换input,output中的Tensor中的batchSize和seqLen中的顺序
cell = torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)

inputs = torch.randn(seq_len, batch_size, input_size) # 不变
hidden = torch.zeros(num_layers, batch_size, hidden_size)

out,hidden = cell(inputs,hidden)

print("Output size", out.shape)
print("Output:",out)
print("Hidden size:",hidden.shape)
print("Hidden:",hidden)

Output size torch.Size([3, 1, 2])
Output: tensor([[[ 0.1688, -0.7854]],

        [[ 0.9801, -0.9822]],

        [[ 0.9556, -0.6337]]], grad_fn=<StackBackward0>)
Hidden size: torch.Size([1, 1, 2])
Hidden: tensor([[[ 0.9556, -0.6337]]], grad_fn=<StackBackward0>)


In [31]:
# 使用RNNCell学习序列到序列的转化规律: hello->ohlol
# e:0 h:1 l:2 o:3 ; one hot


import torch

batch_size = 1
input_size = 4
hidden_size = 4

idx2char=['o','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]

one_hot_lookup = [[1,0,0,0],
                 [0,1,0,0],
                 [0,0,1,0],
                 [0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # seq x inputsize
inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
labels = torch.LongTensor(y_data).view(-1,1)

class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size):
        super(Model,self).__init__() #调用父类的构造，必须要有
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size,hidden_size=self.hidden_size)
        
        
    def forward(self,inputs,hidden):
        hidden = self.rnncell(inputs,hidden)
        return hidden
    
    def init_hidden(self):
        return torch.zeros(self.batch_size,self.hidden_size) #创建零矩阵

model = Model(input_size,hidden_size,batch_size)

# 3. 构造损失函数和优化器
# 这里损失函数用BCE   
criterion = torch.nn.CrossEntropyLoss() #对于输入z，做softmax,Log,-YlogY

# optim中有一个类叫SGD torch.optim.SGD() weight_decay(加一个w^Tw的优化目标)
optimizer = torch.optim.Adam(model.parameters(),lr=0.1)


for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = model.init_hidden()
    print(inputs.shape)
    print('Predicted string',end=" ")
    for inpu,label in zip(inputs,labels): #如果Inputs和循环变量一样，就会不断分解
        hidden = model(inpu,hidden)
        
        loss += criterion(hidden,label)
        _,idx = hidden.max(dim = 1)
        print(idx2char[idx.item()],end="")
        
    loss.backward()
    optimizer.step()
    print(f"，Epoch[{epoch+1}/15] loss={loss.item()}")


torch.Size([5, 1, 4])
Predicted string ooooh，Epoch[1/15] loss=6.678467273712158
torch.Size([5, 1, 4])
Predicted string ooool，Epoch[2/15] loss=5.518108367919922
torch.Size([5, 1, 4])
Predicted string oolol，Epoch[3/15] loss=4.819676399230957
torch.Size([5, 1, 4])
Predicted string ollll，Epoch[4/15] loss=4.426673412322998
torch.Size([5, 1, 4])
Predicted string ollll，Epoch[5/15] loss=4.118429660797119
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[6/15] loss=3.732583999633789
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[7/15] loss=3.3179757595062256
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[8/15] loss=3.0362939834594727
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[9/15] loss=2.8954944610595703
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[10/15] loss=2.8190925121307373
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[11/15] loss=2.754319190979004
torch.Size([5, 1, 4])
Predicted string ohlll，Epoch[12/15] loss=2.683513641357422
torch.Size([5, 1, 4])
Predicted s

In [29]:
# 使用RNN学习序列到序列的转化规律: hello->ohlol
# e:0 h:1 l:2 o:3 ; one hot

# * input
#         * input.shape = (seqSize,batchSize,inputSize)
#         * hidden.shape = (numLayers,batchSize,hiddenSize)
#     * output
#         * output.shape = (seqLen,batchSize,hiddenSize)
#         * hidden.shape = (numLayers,batchSize,inputSize)

import torch

batch_size = 1
input_size = 4
hidden_size = 4

idx2char=['o','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]

one_hot_lookup = [[1,0,0,0],
                 [0,1,0,0],
                 [0,0,1,0],
                 [0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # seq x inputsize
inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
labels = torch.LongTensor(y_data)

class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size,num_layers):
        super(Model,self).__init__() #调用父类的构造，必须要有
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.rnn = torch.nn.RNN(input_size=self.input_size,
                                    hidden_size=self.hidden_size,
                                   num_layers=num_layers)
        
        
    def forward(self,inputs):
        hidden = torch.zeros(self.num_layers,
                            self.batch_size,
                            self.hidden_size)
        out,_ = self.rnn(inputs,hidden)
        return out.view(-1,hidden_size) # seq*batchsize,hiddensize
    
    def init_hidden(self):
        return torch.zeros(self.batch_size,self.hidden_size) #创建零矩阵

model = Model(input_size,hidden_size,batch_size,num_layers)

# 3. 构造损失函数和优化器
# 这里损失函数用BCE   
criterion = torch.nn.CrossEntropyLoss() #对于输入z，做softmax,Log,-YlogY

# optim中有一个类叫SGD torch.optim.SGD() weight_decay(加一个w^Tw的优化目标)
optimizer = torch.optim.Adam(model.parameters(),lr=0.1)


for epoch in range(15):
    optimizer.zero_grad()
    outputs = model(inputs)
    # print(outputs.shape)
    loss = criterion(outputs,labels)
    loss.backward()
    optimizer.step()
    _,idx = outputs.max(dim = 1)
    idx = idx.data.numpy()
    
    print('Predicted string', ''.join([idx2char[x] for x in idx]),end="")
    
    print(f"，Epoch[{epoch+1}/15] loss={loss.item()}")


Predicted string oholl，Epoch[1/15] loss=1.133691668510437
Predicted string oooll，Epoch[2/15] loss=0.9743810892105103
Predicted string ooool，Epoch[3/15] loss=0.895417332649231
Predicted string oolol，Epoch[4/15] loss=0.8364354372024536
Predicted string ohlol，Epoch[5/15] loss=0.7770026326179504
Predicted string ohlol，Epoch[6/15] loss=0.7153452038764954
Predicted string ohlol，Epoch[7/15] loss=0.6577935218811035
Predicted string ohlol，Epoch[8/15] loss=0.6076319813728333
Predicted string ohlol，Epoch[9/15] loss=0.5650906562805176
Predicted string ohlol，Epoch[10/15] loss=0.5289137959480286
Predicted string ohlol，Epoch[11/15] loss=0.4975467622280121
Predicted string ohlol，Epoch[12/15] loss=0.4698221683502197
Predicted string ohlol，Epoch[13/15] loss=0.4468842148780823
Predicted string ohlol，Epoch[14/15] loss=0.42747926712036133
Predicted string ohlol，Epoch[15/15] loss=0.4092729091644287


# 添加Embedding和Linear layer
one-hot编码的缺陷
* 高纬度
* 稀疏
* 认为设定

希望找到这样的编码：Embedding
* 低维
* 稠密
* 从数据中学习到的

Embedding:
* inputSize * EmbeddingSize(之前是one-hot)
* torch.nn.Embedding(size_of_the_dictionary,size_of_each_embedding_vector)
* 输入是LongTensor，输出是(*,embedding_dim)*是输入形状

In [32]:
# 使用RNN学习序列到序列的转化规律: hello->ohlol
# e:0 h:1 l:2 o:3 ; one hot

# * input
#         * input.shape = (seqSize,batchSize,inputSize)
#         * hidden.shape = (numLayers,batchSize,hiddenSize)
#     * output
#         * output.shape = (seqLen,batchSize,hiddenSize)
#         * hidden.shape = (numLayers,batchSize,inputSize)

import torch

num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5



idx2char=['o','h','l','o']
x_data = [[1,0,2,2,3]] # (batch*seq_len)
y_data = [3,1,2,3,2] # (batch*seq_len)

inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

class Model(torch.nn.Module):
    def __init__(self):
        super(Model,self).__init__() #调用父类的构造，必须要有
    
        
        self.emb = torch.nn.Embedding(input_size,embedding_size)
        self.rnn = torch.nn.RNN(input_size=embedding_size,
                                    hidden_size=hidden_size,
                                   num_layers=num_layers,
                               batch_first=True) 
        self.fc = torch.nn.Linear(hidden_size,num_class) #Linear 仅是对输入的最后一维做线性变换,(输入维度，输出维度)
        
        
    def forward(self,x):
        hidden = torch.zeros(num_layers,
                            x.size(0),
                            hidden_size)
        x = self.emb(x) #(batch,seqLen,embeddingSize)
        x,_ = self.rnn(x,hidden)
        x = self.fc(x)
        return x.view(-1,num_class) # seq*batchsize,num_class
    

model = Model()

# 3. 构造损失函数和优化器
# 这里损失函数用BCE   
criterion = torch.nn.CrossEntropyLoss() #对于输入z，做softmax,Log,-YlogY

# optim中有一个类叫SGD torch.optim.SGD() weight_decay(加一个w^Tw的优化目标)
optimizer = torch.optim.Adam(model.parameters(),lr=0.05)


for epoch in range(15):
    optimizer.zero_grad()
    outputs = model(inputs)
    # print(outputs.shape)
    loss = criterion(outputs,labels)
    loss.backward()
    optimizer.step()
    
    _,idx = outputs.max(dim = 1)
    idx = idx.data.numpy()
    
    print('Predicted string', ''.join([idx2char[x] for x in idx]),end="")
    
    print(f"，Epoch[{epoch+1}/15] loss={loss.item()}")


Predicted string loooo，Epoch[1/15] loss=1.44900643825531
Predicted string loooo，Epoch[2/15] loss=1.19287109375
Predicted string lolol，Epoch[3/15] loss=0.9887218475341797
Predicted string lhlol，Epoch[4/15] loss=0.8155452609062195
Predicted string ohlhl，Epoch[5/15] loss=0.679765522480011
Predicted string ohlol，Epoch[6/15] loss=0.5524497032165527
Predicted string ohlol，Epoch[7/15] loss=0.43207812309265137
Predicted string ohlol，Epoch[8/15] loss=0.3315155804157257
Predicted string ohlol，Epoch[9/15] loss=0.2460474967956543
Predicted string ohlol，Epoch[10/15] loss=0.17854580283164978
Predicted string ohlol，Epoch[11/15] loss=0.12948141992092133
Predicted string ohlol，Epoch[12/15] loss=0.09429500252008438
Predicted string ohlol，Epoch[13/15] loss=0.06889984011650085
Predicted string ohlol，Epoch[14/15] loss=0.050606053322553635
Predicted string ohlol，Epoch[15/15] loss=0.03748921677470207


# LSTM
* 其实就是增加了一个$c_t$通路，反向传播的时候路径比较直接
* input(h_0,c_0)
* output(h_n,c_n)

GRU是LSTM和RNN在性能和计算复杂度之间的折衷

In [72]:
import gzip
import csv
import time
def time_since(since):
    s = time.time()-since
    m = math.floor(s/60)
    s -= m*60
    return f'{m}minute,{s}second'

def create_tensor(tensor):
    if USE_GPU:
        device = torch.device("cuda:0")
        tensor = tensor.to(device)
    return tensor

In [73]:
from torch.utils.data import Dataset
class NameDataset(Dataset):
    def __init__(self,is_train_set=True):
        filename = r"./dataset/names_train.csv.gz" if is_train_set else r'./dataset/names_test.csv.gz'
        with gzip.open(filename,'rt') as f: #默认，只读文本
            reader = csv.reader(f)
            rows = list(reader)
        self.names = [row[0] for row in rows]
        self.len = len(self.names)
        self.countries = [row[1] for row in rows]
        
        self.country_list = list(sorted(set(self.countries)))
        self.country_dict = self.getCountryDict()
        self.country_num = len(self.country_list)
        
    def __getitem__(self,index):
        return self.names[index],self.country_dict[self.countries[index]]
    
    def __len__(self):
        return self.len
    
    def getCountryDict(self):
        country_dict = dict()
        for idx,country_name in enumerate(self.country_list,0):
            country_dict[country_name] = idx
        return country_dict
    
    def idx2country(self,index):
        return self.country_list[index]
    
    def getCountriesNum(self):
        return self.country_num

In [77]:
import torch
from torch.nn.utils.rnn import pack_padded_sequence
class RNNClassifier(torch.nn.Module):
    def __init__(self,input_size,hidden_size,output_size,n_layers=1,bidirectional = True):
        super(RNNClassifier,self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = 2 if bidirectional else 1
        
        self.embedding = torch.nn.Embedding(input_size,hidden_size)
        self.gru = torch.nn.GRU(hidden_size,hidden_size,n_layers,
                                bidirectional=bidirectional)
        self.fc = torch.nn.Linear(hidden_size*self.n_directions,output_size)
        pass
    
    def _init_hidden(self,batch_size):
        hidden = torch.zeros(self.n_layers*self.n_directions,
                            batch_size,
                            self.hidden_size)
        return create_tensor(hidden)
    
    def forward(self,inputs,seq_lengths):
        # input shape: B*S-》S*B
        inputs = inputs.t() #转置
        batch_size = inputs.size(1)
        
        hidden = self._init_hidden(batch_size)
        embedding = self.embedding(inputs) # (seqlen,batchsize,hiddensize)
        
        # 返回一个 PackedSequence 对象，它主要包含两部分：data 和 batch_sizes ,batch_sizes实际上就是告诉网络每个时间步需要吃进去多少数据
        # input：经过 pad_sequence 处理之后的数据。
        # lengths：mini-batch中各个序列的实际长度。
        # 默认条件下，我们必须把输入数据按照序列长度从大到小排列后才能送入 pack_padded_sequence ，否则会报错。
        # 在 PyTorch 里面使用函数 pad_sequence 对序列进行填充。填充之后的样本序列，虽然长度相同了，但是序列里面可能填充了很多无效值 0 ，将填充值 0 喂给 RNN 进行 forward 计算，不仅浪费计算资源，最后得到的值可能还会存在误差。
        gru_input = pack_padded_sequence(embedding,seq_lengths)
    
        
        output,hidden = self.gru(gru_input,hidden)
        if self.n_directions == 2:
            hidden_cat = torch.cat([hidden[-1],hidden[-2]],dim=1)
        else:
            hidden_cat = hidden[-1]
        fc_output = self.fc(hidden_cat)
        return fc_output

In [None]:
def name2list(name):
    arr = [ord(c) for c in name] # ord返回对应的 ASCII 数值
    return arr,len(arr)


def make_tensors(names,countries):
    sequences_and_lengths = [name2list(name) for name in names]
    name_sequences = [s1[0] for s1 in sequences_and_lengths]
    seq_lengths = torch.LongTensor([s1[1] for s1 in sequences_and_lengths])
    countries = countries.long()
    
    # batchsize * seqlen
    seq_tensor = torch.zeros(len(name_sequences),seq_lengths.max()).long()
    for idx,(seq,seq_len) in enumerate(zip(name_sequences,seq_lengths),0):
        seq_tensor[idx,:seq_len]=torch.LongTensor(seq)
        
    # 排序
    seq_lengths, perm_idx = seq_lengths.sort(dim=0,descending=True)
    seq_tensor =  seq_tensor[perm_idx]
    countries = countries[perm_idx]
    
    return create_tensor(seq_tensor),create_tensor(seq_lengths),create_tensor(countries)




In [79]:
# 循环神经网络的分类器：根据名字，输出国家(18个)
# 输入->embedding->GRU Layer -> 最后一个输入线形层
# 字符串长度不一，以最长字符串为基准，其它字符串用0填充
import torch

from torch.utils.data import DataLoader


n_chars=128 #字母表有多少元素
hidden_size=100 #GRU
n_layer=2 #GRU几层的
batch_size=256
N_EPOCHS = 100
USE_GPU = False

# 1. 数据准备
trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset,batch_size=batch_size,shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset,batch_size=batch_size,shuffle=False)

n_country = trainset.getCountriesNum()
    
model = RNNClassifier(n_chars,hidden_size,n_country,n_layer)

if USE_GPU:
    device = torch.device("cuda:0")
    classifier.to(device)

# 3. 构造损失函数和优化器
criterion = torch.nn.CrossEntropyLoss() #对于输入z，做softmax,Log,-YlogY

# optim中有一个类叫SGD torch.optim.SGD() weight_decay(加一个w^Tw的优化目标)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)

strat = time.time()
print(f"Training for {N_EPOCHS} epochs...")
acc_list = []


def trainModel():
    total_loss = 0
    for i,(names,countries) in enumerate(trainloader,1):
        inputs, seq_lengths, target = make_tensors(names,countries)
        output = model(inputs,seq_lengths)
        loss = criterion(output,target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    return total_loss

def testModel():
    correct = 0
    total = len(testset)
    with torch.no_grad(): #主要是用于停止autograd模块的工作,以起到加速和节省显存的作用
        for i,(names,countries) in enumerate(testloader,1):
            inputs, seq_lengths, target = make_tensors(names,countries)
            output = model(inputs,seq_lengths)
            
            pred = output.max(dim=1,keepdim=True)[1] #保持原数组的维度
         
            correct += pred.eq(target.view_as(pred)).sum().item() #返回被视作与给定的tensor相同大小的原tensor
        print(f"Accuracy on test set:{100*correct/total}%")
    return correct/total
        
for epoch in range(1,N_EPOCHS+1):
    trainModel()
    acc = testModel()
    acc_list.append(acc)

Training for 100 epochs...
Accuracy on test set:66.34328358208955%
Accuracy on test set:73.67164179104478%
Accuracy on test set:77.53731343283582%
Accuracy on test set:79.53731343283582%
Accuracy on test set:81.07462686567165%
Accuracy on test set:82.28358208955224%
Accuracy on test set:82.67164179104478%
Accuracy on test set:83.19402985074628%
Accuracy on test set:83.80597014925372%
Accuracy on test set:84.13432835820896%
Accuracy on test set:84.55223880597015%
Accuracy on test set:84.3731343283582%
Accuracy on test set:84.80597014925372%
Accuracy on test set:84.65671641791045%
Accuracy on test set:85.01492537313433%
Accuracy on test set:84.49253731343283%
Accuracy on test set:84.41791044776119%
Accuracy on test set:84.44776119402985%
Accuracy on test set:84.53731343283582%
Accuracy on test set:84.26865671641791%
Accuracy on test set:84.6268656716418%
Accuracy on test set:84.43283582089552%
Accuracy on test set:84.41791044776119%
Accuracy on test set:83.94029850746269%
Accuracy on tes

KeyboardInterrupt: 

所有序列数据的问题，都可以用RNN去解决

* 文档
* 多读文献
* 多动手