# self-attention学习

## 参考资料

![一文读懂自注意力机制：8大步骤图解+代码](https://mp.weixin.qq.com/s?__biz=MzU3NjE4NjQ4MA==&mid=2247486455&idx=3&sn=805e9754b93de33e93ef2acf08d6911c&chksm=fd16fae8ca6173fec4e3ba64e513f4907260b13fb154e448d8206e0e50445ec316c9619714b4&mpshare=1&scene=22&srcid=&sharer_sharetime=1574810766385&sharer_shareid=da84f0d2d31380d783922b9e26cacfe2#rd)

# New heading

## 实现

这是PyTorch代码🤗，PyTorch是Python的一个流行的深度学习框架。

### 步骤1：准备输入

In [1]:
import torch

x = [
  [1, 0, 1, 0], # Input 1
  [0, 2, 0, 2], # Input 2
  [1, 1, 1, 1]  # Input 3
 ]
x = torch.tensor(x, dtype=torch.float32)

### 步骤2：初始化权重

In [2]:
w_key = [
  [0, 0, 1],
  [1, 1, 0],
  [0, 1, 0],
  [1, 1, 0]
]
w_query = [
  [1, 0, 1],
  [1, 0, 0],
  [0, 0, 1],
  [0, 1, 1]
]
w_value = [
  [0, 2, 0],
  [0, 3, 0],
  [1, 0, 3],
  [1, 1, 0]
]
w_key = torch.tensor(w_key, dtype=torch.float32)
w_query = torch.tensor(w_query, dtype=torch.float32)
w_value = torch.tensor(w_value, dtype=torch.float32)

### 步骤3: 推导键、查询和值

In [6]:

keys = x @ w_key
querys = x @ w_query
values = x @ w_value

print(keys)
# tensor([[0., 1., 1.],
#         [4., 4., 0.],
#         [2., 3., 1.]])

print(querys)
# tensor([[1., 0., 2.],
#         [2., 2., 2.],
#         [2., 1., 3.]])

print(values)
# tensor([[1., 2., 3.],
#         [2., 8., 0.],
#         [2., 6., 3.]])

tensor([[0., 1., 1.],
        [4., 4., 0.],
        [2., 3., 1.]])
tensor([[1., 0., 2.],
        [2., 2., 2.],
        [2., 1., 3.]])
tensor([[1., 2., 3.],
        [2., 8., 0.],
        [2., 6., 3.]])


### 步骤4：计算注意力得分

In [11]:
attn_scores = querys @ keys.t()
print(attn_scores)

tensor([[ 2.,  4.,  4.],
        [ 4., 16., 12.],
        [ 4., 12., 10.]])


### 步骤5：计算softmax

In [12]:

from torch.nn.functional import softmax

attn_scores_softmax = softmax(attn_scores, dim=-1)
# tensor([[6.3379e-02, 4.6831e-01, 4.6831e-01],
#         [6.0337e-06, 9.8201e-01, 1.7986e-02],
#         [2.9539e-04, 8.8054e-01, 1.1917e-01]])

# For readability, approximate the above as follows
attn_scores_softmax = [
  [0.0, 0.5, 0.5],
  [0.0, 1.0, 0.0],
  [0.0, 0.9, 0.1]
]
attn_scores_softmax = torch.tensor(attn_scores_softmax)

### 步骤6：将得分和值相乘

In [14]:

weighted_values = values[:,None] * attn_scores_softmax.t()[:,:,None]
print(weighted_values)

tensor([[[0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000]],

        [[1.0000, 4.0000, 0.0000],
         [2.0000, 8.0000, 0.0000],
         [1.8000, 7.2000, 0.0000]],

        [[1.0000, 3.0000, 1.5000],
         [0.0000, 0.0000, 0.0000],
         [0.2000, 0.6000, 0.3000]]])


### 步骤7：求和加权值

In [16]:
outputs = weighted_values.sum(dim=0)
print(outputs)

tensor([[2.0000, 7.0000, 1.5000],
        [2.0000, 8.0000, 0.0000],
        [2.0000, 7.8000, 0.3000]])
