In [2]:
import torch;
import tiktoken;
import torch
import torch.nn as nn
import math


from MyTransformer import MyGPTModel;

torch.manual_seed(123)



GPT_CONFIG = {
    "vocab_size": 50257,    #词汇表大小
    "max_seq_length": 256, #每一句训练数据的最大长度
    "embedding_dim": 768,   #嵌入向量的维度
    "n_heads": 12,          #注意力头个数
    "n_layers": 12,         #Transformer 层数
    "drop_rate": 0.1,       #Dropout rate
    "qkv_bias": False       #bias
}

#torch.manual_seed(123)
model = MyGPTModel(GPT_CONFIG)

x = torch.tensor([[1,2,3], [6666,7777,888]])
out = model(x)
print("Input data:\n", x)
print("\nOutput shape:", out.shape)
print(out)

Input data:
 tensor([[   1,    2,    3],
        [6666, 7777,  888]])

Output shape: torch.Size([2, 3, 50257])
tensor([[[ 0.3655, -0.0237, -0.3525,  ..., -0.1663, -0.4530,  0.5786],
         [-0.3553,  0.2906, -0.0551,  ...,  0.5214, -0.7186,  0.3113],
         [ 1.1195,  0.2388, -0.3048,  ..., -1.4442, -0.1237, -0.5238]],

        [[ 0.2089,  0.5267,  0.2014,  ..., -0.2349,  0.2699, -0.4303],
         [-1.0053, -0.0119, -0.1596,  ...,  0.3058, -0.2603, -1.3904],
         [-0.0801,  0.1297,  0.3988,  ..., -0.3624, -0.3797, -0.8069]]],
       grad_fn=<UnsafeViewBackward0>)


# 一、 推理

In [4]:

# 
# max_new_tokens 生成文本的最大个数
# 文本上下文的大小
def generate_text(model, prompt, max_new_tokens, context_seq_size):
    for  _ in range(max_new_tokens):
        # 最大限制额 1024 
        prompt_slice = prompt[:, -context_seq_size:];
        # 创建pytorch的上下文管理器  作用是： 禁止梯度计算
        with torch.no_grad():
            # logits 嵌入向量矩阵
            logits = model(prompt);

        # 从logits中拿到最后一位
        # logits[batch_size, new_token, vocab_size];
        logits = logits[:, -1, :];

        probas = torch.softmax(logits, dim=1);

        # 得到最大概率的位置 
        new_token = torch.argmax(probas, dim=-1, keepdim=True);

        # 
        prompt = torch.cat((prompt, new_token), dim=1);
    return prompt;

# 二、 编码向量

In [5]:
#  文本转token
def text_to_tokenids(text, tokenizer):
    encoded = tokenizer.encode(text);
    encoded_new = torch.tensor(encoded).unsqueeze(0); #  在新生成的tensor的最前面增加一个新tensor
    return encoded_new;

# token 转文本
def tokenids_to_text(tokenids, tokenizer):
    token_new = tokenids.squeeze(0); # 它会将tokenids中最前一个维度位1去了  [[1, 2, 3]]
    return tokenizer.decode(token_new.tolist());

In [6]:

# 设置推理模式
model.eval();


#  设置提示词
prompt = "北京的最高山是?";  # 东灵山

tokenizer = tiktoken.get_encoding("gpt2");




prpt = text_to_tokenids(prompt, tokenizer);

token_ids = generate_text(model=model, 
              prompt=prpt, 
              max_new_tokens=10, 
              context_seq_size=256, #每一句训练数据的最大长度
              );



print(tokenids_to_text(token_ids, tokenizer));

北京的最高山是? Corporate denomineousembedreportprintashesarded branded Toledo listenerimble
