In [6]:
from math import sqrt

import torch
import torch.nn as nn

class SelfAttention(nn.Module):
    def __init__(self, dim_q, dim_k, dim_v):
        super(SelfAttention, self).__init__()
        self.dim_q = dim_q
        self.dim_k = dim_k
        self.dim_v = dim_v

        #定义线性变换函数
        self.linear_q = nn.Linear(dim_q, dim_k, bias=False)
        self.linear_k = nn.Linear(dim_q, dim_k, bias=False)
        self.linear_v = nn.Linear(dim_q, dim_v, bias=False)
        self._norm_fact = 1 / sqrt(dim_k)

    def forward(self, x):
        # x: batch, max_len, dim_q
        #根据文本获得相应的维度

        batch, n, dim_q = x.shape
        assert dim_q == self.dim_q

        q = self.linear_q(x)  # batch, max_len, dim_k
        k = self.linear_k(x)  # batch, max_len, dim_k
        v = self.linear_v(x)  # batch, max_len, dim_v
        #q*k的转置 并*开根号后的dk
        dist = torch.bmm(q, k.transpose(1, 2)) * self._norm_fact  # batch, max_len, max_len
        #归一化获得attention的相关系数
        dist = torch.softmax(dist, dim=-1)  # batch, max_len, max_len
        #attention系数和v相乘，获得最终的得分
        att = torch.bmm(dist, v) # batch, max_len, dim_v
        return att

In [7]:
ATT = SelfAttention(768, 768, 768)


In [8]:
x = torch.randn(8, 128, 768)


In [9]:
y = ATT(x)
y.shape


torch.Size([8, 128, 768])

In [10]:
import re
x = 'OPPO闪充充电器 X9070 X9077 R5 快充头通用手机数据线 套餐【2.4充电头+数据线 】 安卓 1.5m'
p = re.compile('[a-zA-Z0-9]+-[a-zA-Z0-9]+')
y = p.findall(x)
y

[]

In [15]:

import jieba
x = 'OWIN净水器家用厨房欧恩科技反渗透纯水机O-50CSC'
y = jieba.lcut(x)
y

['OWIN', '净水器', '家用', '厨房', '欧恩', '科技', '反渗透', '纯水机', 'O', '-', '50CSC']

In [2]:
import torch.nn as nn
import torch
batch_size=8
class GlobalPointerCrossEntropy(nn.Module):
    '''Multi-class Focal loss implementation'''

    def __init__(self, ):
        super(GlobalPointerCrossEntropy, self).__init__()
        self.weight = nn.Linear(batch_size*53, batch_size*53, bias=False)

    @staticmethod
    def multilabel_categorical_crossentropy(y_true, y_pred):
        y_pred = (1 - 2 * y_true) * y_pred
        y_pred_neg = y_pred - y_true * 1e12
        y_pred_pos = y_pred - (1 - y_true) * 1e12
        zeros = torch.zeros_like(y_pred[..., :1])
        y_pred_neg = torch.cat([y_pred_neg, zeros], dim=-1)
        y_pred_pos = torch.cat([y_pred_pos, zeros], dim=-1)
        neg_loss = torch.logsumexp(y_pred_neg, dim=-1)
        pos_loss = torch.logsumexp(y_pred_pos, dim=-1)

        return neg_loss + pos_loss

    def forward(self, logits, target):
        """
        logits: [N, C, L, L]
        """
        bh = logits.shape[0] * logits.shape[1]
        target = torch.reshape(target, (bh, -1))
        logits = torch.reshape(logits, (bh, -1))
        each_loss = GlobalPointerCrossEntropy.multilabel_categorical_crossentropy(target, logits)

        each_loss = self.weight(each_loss)
        # each_loss.shape： (bs*53, )
        return torch.mean(each_loss)

In [6]:
loss_fn = GlobalPointerCrossEntropy()

In [13]:
for p in loss_fn.parameters():
    print(p.shape)

torch.Size([424, 424])


In [26]:

x = torch.zeros((3,3))
x = torch.nn.init.uniform_(x,a=0, b=1,requires_grad=True)

TypeError: uniform_() got an unexpected keyword argument 'requires_grad'

In [25]:
x.requires_grad

False

In [1]:
import nltk

In [5]:
brown = nltk.corpus.brown
corpus = [word.lower() for word in brown.words()]

In [10]:
spl = 95*len(corpus)//100
train = corpus[:spl]
test = corpus[spl:]

In [13]:
# Remove rare words from the corpus
fdist = nltk.FreqDist(w for w in train)
vocabulary = set(map(lambda x: x[0], filter(lambda x: x[1] >= 5, fdist.items())))


In [14]:
train = map(lambda x: x if x in vocabulary else "*unknown*", train)
test = map(lambda x: x if x in vocabulary else "*unknown*", test)

In [15]:
from nltk.model import NgramModel
from nltk.probability import LidstoneProbDist

ModuleNotFoundError: No module named 'nltk.model'

In [16]:
import nltk.lm.NgramCounter

ModuleNotFoundError: No module named 'nltk.lm.NgramCounter'

In [2]:
import torch
torch.arange(2)

tensor([0, 1])

In [3]:
input_ids = torch.randn((8, 128))
input_ids.shape


torch.Size([8, 128])

In [4]:
position_ids = torch.arange(start=1,end=129, dtype=torch.long, device=input_ids.device)
print(position_ids.shape)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
print(position_ids.shape)

torch.Size([128])
torch.Size([8, 128])


In [5]:
position_ids

tensor([[  1,   2,   3,  ..., 126, 127, 128],
        [  1,   2,   3,  ..., 126, 127, 128],
        [  1,   2,   3,  ..., 126, 127, 128],
        ...,
        [  1,   2,   3,  ..., 126, 127, 128],
        [  1,   2,   3,  ..., 126, 127, 128],
        [  1,   2,   3,  ..., 126, 127, 128]])

In [14]:
start_ids = torch.ones((8,128))

In [15]:
position_ids = torch.cat((position_ids, start_ids),dim=-1)

In [16]:
position_ids.shape


torch.Size([8, 256])

In [None]:
x = torch.randn((8,128,768))
y = torch.randn((8,128,768))
z = torch.cat((x,y), dim=1)
z.shape


NameError: name 'torch' is not defined

In [4]:
import random
x = ['1.txt','2.txt','3.txt']*10
random.sample(x, 5)

['2.txt', '1.txt', '1.txt', '2.txt', '2.txt']