# 用markdown记录一下并行的公式推导

# 实现miniGRU，使用李沐的框架来做文本预测

In [1]:
from d2l import torch as d2l

In [59]:
# 实现ParallelScan算法，prefix scan algorithm(Blelloch)
# vt = at * vt−1 + bt
# ht = (1 - zt) * ht-1 + zt * h_tilde

import numpy as np
from functools import reduce
np.random.seed(100)

sequence_length = 8

z = np.random.rand(sequence_length)
print(z)
print(1-z)

h_tilde = np.random.rand(sequence_length)
print(h_tilde)
h0 = 1

def parallel_scan(h0, z, b):
    ans = []
    h = h0
    for i in range(sequence_length):
        h = (1 - z[i]) * h + b[i]
        ans.append(h)
    return ans

h = parallel_scan(h0, z, z * h_tilde)
print(h)


[0.54340494 0.27836939 0.42451759 0.84477613 0.00471886 0.12156912
 0.67074908 0.82585276]
[0.45659506 0.72163061 0.57548241 0.15522387 0.99528114 0.87843088
 0.32925092 0.17414724]
[0.13670659 0.57509333 0.89132195 0.20920212 0.18532822 0.10837689
 0.21969749 0.97862378]
[0.5308820946192279, 0.5431891488669744, 0.6909776486953454, 0.283985182706266, 0.28351963468489383, 0.2622276852584489, 0.23370059747630423, 0.8488974639927129]


In [60]:
import torch
from torch.nn import functional as F

def parallel_scan_log(log_coeffs, log_values):
    a_star = F.pad( torch.cumsum(log_coeffs, dim=0), (1, 0)) # a_star[0] = 0 也就是log(1)
    log_h0_plus_b_star = torch.logcumsumexp(log_values - a_star, dim=0)
    h = log_h0_plus_b_star + a_star
    return h

z = torch.tensor(z)
h_tilde = torch.tensor(h_tilde)
h0 = torch.tensor(0.0)

log_coeffs = torch.log(1- z)
log_values = torch.log(z) + torch.log(h_tilde)

h = parallel_scan_log(log_coeffs, torch.cat([h0.view(1), log_values], dim=0))
torch.exp(h)[1:]




tensor([0.5309, 0.5432, 0.6910, 0.2840, 0.2835, 0.2622, 0.2337, 0.8489],
       dtype=torch.float64)

In [56]:
a = np.array([1, 2, 3, 4])
# 只在上面一行做填充
np.pad(a, ((1, 0)), mode='constant')

# import torch

# a = torch.tensor([1, 2, 3, 4])
# a = a.reshape(2, 2)
# # 只在上面一行做填充
# torch.nn.functional.pad(a, (0, 0, 1, 0), mode='constant')

array([0, 1, 2, 3, 4])