In [65]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import requests
import tiktoken
import chardet
import math

In [46]:


#get data
if not os.path.exists('sales_testbook.txt'):
    url = 'https://huggingface.co/datasets/goendalf666/sales-textbook_for_convincing_and_selling/resolve/main/sales_textbook.txt?download=true'
    with open('sales_testbook.txt', 'wb') as f:
        f.write(requests.get(url,verify=False).content)
with open('sales_testbook.txt', 'r',encoding='utf-8') as f:
    text = f.read()

text[:1000]

'Chapter 1: Building Rapport and Capturing Attention\nSubpoint: Understanding the Importance of Building Rapport\nBuilding rapport is a fundamental skill in sales that cannot be underestimated. It lays the foundation for establishing a connection with your potential customers, gaining their trust, and ultimately convincing them to make a purchase. Rapport can be defined as a harmonious relationship based on mutual understanding and empathy. When you build rapport with someone, you create a sense of familiarity, comfort, and shared interests, making it easier to communicate and influence their decision-making process.\nOne of the main reasons why building rapport is crucial in sales is that people are more likely to buy from someone they like and trust. By establishing a positive and genuine connection with your customers, you increase their confidence in you and your product or service. People want to do business with individuals they feel comfortable with, those who understand their n

In [68]:
#hyperparameters
context_length = 16
d_model = 64
batch_size = 4
num_head = 4

In [43]:

encoding = tiktoken.get_encoding('cl100k_base')

In [56]:
#tokenized
tokenized_text = encoding.encode(text)
tokenized_text = torch.tensor(tokenized_text,dtype=torch.long)
max_token_value = tokenized_text.max().item()#出现文字对应最大值

In [52]:
#切成训练和测试两部分
train_idex = int(0.9 * len(tokenized_text))
train_data = tokenized_text[:train_idex]
test_data = tokenized_text[train_idex:]

In [61]:
data = train_data
idxs = torch.randint(low=0, high=len(data)-context_length, size=(batch_size,))#随机生成第一个字的索引
x_batch = torch.stack([data[idx:idx+context_length] for idx in idxs])
y_batch = torch.stack([data[idx+1:idx+context_length+1] for idx in idxs])

In [63]:
#embedding
input_embedding_lookup_table = nn.Embedding(max_token_value+1, d_model)#10070*64
x_batch_embedding = input_embedding_lookup_table(x_batch)#相当于一个方法
y_batch_embedding = input_embedding_lookup_table(y_batch)


torch.Size([4, 16, 64])

In [66]:

#得到位置信息（偶数sin 奇数cos）
position_encoding_lookup_table = torch.zeros(context_length,d_model)
position = torch.arange(0, context_length, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float()*(-math.log(10000.0)/d_model))
position_encoding_lookup_table[:, 0::2] = torch.sin(position * div_term)
position_encoding_lookup_table[:, 1::2] = torch.cos(position * div_term)
position_encoding_lookup_table =position_encoding_lookup_table.unsqueeze(0).expand(batch_size, -1, -1)



In [67]:
x = x_batch_embedding+position_encoding_lookup_table
y = y_batch_embedding+position_encoding_lookup_table

In [69]:
#muti
Wq = nn.Linear(d_model, d_model)#为什么是64*64维度的 因为矩阵相乘 16*64*64*64
Wk = nn.Linear(d_model, d_model)
Wv = nn.Linear(d_model, d_model)

Q =Wq(x)
K = Wk(x)
V = Wv(x)



In [70]:
Q = Q.view(batch_size,context_length,num_head,d_model//num_head).permute(0,2,1,3)#转换形状
K = K.view(batch_size,context_length,num_head,d_model//num_head).permute(0,2,1,3)
V = V.view(batch_size,context_length,num_head,d_model//num_head).permute(0,2,1,3)


In [71]:
output = Q @ K.transpose(-2,-1)/math.sqrt(d_model//num_head)



In [73]:
#mask(训练时需要去遮住后面的字)
mask = torch.triu(torch.ones(context_length,context_length),diagonal=1).bool()
output = output.masked_fill(mask,float('-inf'))
attention_score = F.softmax(output,dim=-1)


In [83]:
A = attention_score @ V


In [84]:
A = A.permute(0,2,1,3).reshape(batch_size,context_length,d_model)#形状变回来
Wo = nn.Linear(d_model, d_model)
output = Wo(A)
output.shape

torch.Size([4, 16, 64])

In [85]:
output = output +x


In [90]:
layer_norm = nn.LayerNorm(d_model)
output = layer_norm(output)
layer_norm_output = layer_norm(output)

In [91]:
#feedforward
ffn = nn.Sequential(
    nn.Linear(d_model,4*d_model ),#进来d_model 出去d_model*4
    nn.ReLU(),
    nn.Linear(4*d_model,d_model)
)
layer_norm_output = ffn(layer_norm_output)
output = output + layer_norm_output


torch.Size([4, 16, 64])

In [92]:
output = layer_norm(output)

In [93]:
#linear layer
output = nn.Linear(d_model,max_token_value+1)(output)

In [99]:
logits = F.softmax(output,dim=-1)
predicted_index = torch.argmax(logits[0,0]).item()

88875

In [101]:
encoding.decode([predicted_index])

" ''),"