In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

class SelfAttention(nn.Module):
    def __init__(self, embed_dim) -> None:
        super(SelfAttention,self).__init__()
        self.query= nn.Linear(embed_dim,embed_dim)
        self.key=nn.Linear(embed_dim,embed_dim)
        self.values=nn.Linear(embed_dim,embed_dim)

    def forward(self,x):
        query=self.query(x)
        key=self.query(x)
        value=self.query(x)
        attn_weights=torch.matmul(query,key.transpose(-1,-2))
        attn_weights=nn.functional.softmax(attn_weights,dim=-1)
        attn_values= torch.matmul(attn_weights,value)
        return attn_values
    
class SelfAttentionClassfier(nn.Module):
    def __init__(self, embed_dim,hidden_num,num_classes) -> None:
        super().__init__()
        self.attention=SelfAttention(embed_dim)
        self.fc1=nn.Linear(embed_dim,hidden_num)
        self.fc2=nn.Linear(hidden_num,num_classes)
    def forward(self,x):
        attn_values=self.attention(x)
        x= attn_values.mean(dim=1)
        x= self.fc1(x)
        x=self.fc2(torch.relu(x))
        return x

In [2]:
import d2l.torch as d2l

In [3]:
#@save
def masked_softmax(X, valid_lens):
    """通过在最后一个轴上掩蔽元素来执行softmax操作"""
    # X:3D张量，valid_lens:1D或2D张量
    if valid_lens is None:
        return nn.functional.softmax(X, dim=-1)
    else:
        shape = X.shape#(2,2,4)
        if valid_lens.dim() == 1:
            valid_lens = torch.repeat_interleave(valid_lens, shape[1])
        else:
            valid_lens = valid_lens.reshape(-1)
        # 最后一轴上被掩蔽的元素使用一个非常大的负值替换，从而其softmax输出为0
        X = d2l.sequence_mask(X.reshape(-1, shape[-1]), valid_lens,
                              value=-1e6)
        return nn.functional.softmax(X.reshape(shape), dim=-1)

In [20]:
a=torch.rand(2, 2,1, 4)
b=a.unsqueeze(2)
print(a)
print(b)
print(a.shape,b.shape)

tensor([[[[0.0585, 0.9168, 0.1479, 0.5782]],

         [[0.7103, 0.4789, 0.5136, 0.6076]]],


        [[[0.8103, 0.7169, 0.9287, 0.7512]],

         [[0.0312, 0.6515, 0.2874, 0.6632]]]])
tensor([[[[[0.0585, 0.9168, 0.1479, 0.5782]]],


         [[[0.7103, 0.4789, 0.5136, 0.6076]]]],



        [[[[0.8103, 0.7169, 0.9287, 0.7512]]],


         [[[0.0312, 0.6515, 0.2874, 0.6632]]]]])
torch.Size([2, 2, 1, 4]) torch.Size([2, 2, 1, 1, 4])


In [10]:
0.4682+0.4332+0.7512+ 0.3108

1.9634

In [2]:
import torch

In [3]:
torch.__version__

'2.2.0'

In [1]:
import math
import pandas as pd
import torch
from torch import nn
from d2l import torch as d2l

In [2]:
#@save
class PositionWiseFFN(nn.Module):
    """基于位置的前馈网络"""
    def __init__(self, ffn_num_input, ffn_num_hiddens, ffn_num_outputs,
                 **kwargs):
        super(PositionWiseFFN, self).__init__(**kwargs)
        self.dense1 = nn.Linear(ffn_num_input, ffn_num_hiddens)
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(ffn_num_hiddens, ffn_num_outputs)

    def forward(self, X):
        return self.dense2(self.relu(self.dense1(X)))

In [3]:
ffn = PositionWiseFFN(4, 4, 8)
ffn.eval()
ffn(torch.ones((2, 3, 4)))[0]

tensor([[-0.0716,  0.2611, -0.4501,  0.4004, -0.2232, -0.1011,  0.1762, -0.1173],
        [-0.0716,  0.2611, -0.4501,  0.4004, -0.2232, -0.1011,  0.1762, -0.1173],
        [-0.0716,  0.2611, -0.4501,  0.4004, -0.2232, -0.1011,  0.1762, -0.1173]],
       grad_fn=<SelectBackward0>)

In [4]:
ffn = PositionWiseFFN(4, 4, 8)
ffn.eval()
a=torch.ones((2, 3, 4))
print(a)
print(ffn(a))
print(ffn(a).shape)

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
tensor([[[-0.1249,  0.2247,  0.3036,  0.3652,  0.5125,  0.0230, -0.2868,
          -0.4994],
         [-0.1249,  0.2247,  0.3036,  0.3652,  0.5125,  0.0230, -0.2868,
          -0.4994],
         [-0.1249,  0.2247,  0.3036,  0.3652,  0.5125,  0.0230, -0.2868,
          -0.4994]],

        [[-0.1249,  0.2247,  0.3036,  0.3652,  0.5125,  0.0230, -0.2868,
          -0.4994],
         [-0.1249,  0.2247,  0.3036,  0.3652,  0.5125,  0.0230, -0.2868,
          -0.4994],
         [-0.1249,  0.2247,  0.3036,  0.3652,  0.5125,  0.0230, -0.2868,
          -0.4994]]], grad_fn=<ViewBackward0>)
torch.Size([2, 3, 8])


In [6]:
import gensim

# 训练语料
sentences = ["I love natural language processing.", "Natural language processing is a lot of fun."]

# 训练模型
model = gensim.models.Word2Vec(sentences, min_count=1)

# 查看词向量
print(model.wv['language'])


KeyError: "Key 'language' not present"

In [7]:
import torch
from gensim.models import KeyedVectors

# 加载预训练的 Word2Vec 模型
word2vec_model_path = "path_to_your_word2vec_model.bin"
word2vec_model = KeyedVectors.load_word2vec_format(word2vec_model_path, binary=True)

# 获取单词 "king" 的向量表示
king_vector = word2vec_model["king"]
print("Word embedding for 'king':", king_vector)

# 获取单词 "queen" 的向量表示
queen_vector = word2vec_model["queen"]
print("Word embedding for 'queen':", queen_vector)

# 计算两个单词之间的相似度（余弦相似度）
similarity = word2vec_model.similarity("king", "queen")
print("Similarity between 'king' and 'queen':", similarity)


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_your_word2vec_model.bin'