In [17]:
import copy
import numpy as np
import torch.nn as nn
import torch
import torch.optim as optim
import matplotlib.pyplot as plt
import math
from torch.autograd import Variable
import torch.nn.functional as F
import os

In [24]:
#数据预处理
LEARNING_RATE = 0.1
n_epochs = 100
n_points = 10
data = torch.rand(n_points, 2) * 2 - 1 
labels = (data.norm(dim = 1) > 0.7).float().unsqueeze(1)

device = "cuda" if torch.cuda.is_available() else "cpu"
data.to(device)
labels.to(device)

#创建模型类
class CircleClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(2,20)
        self.layer2 = nn.Linear(20,1)
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.sigmoid(self.layer2(x))
        return x
        
#实例化
model = CircleClassifier()
model = model.to(device)
loss_fn = nn.BCELoss()
optimizer = optim.AdamW(model.parameters(), lr = LEARNING_RATE)

for epoch in range(n_epochs):
    optimizer.zero_grad() #梯度归零
    predictions = model(data)
    loss = loss_fn(predictions, labels)
    optimizer.step()
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item(): .4f}")


Epoch 0, Loss:  0.7203
Epoch 20, Loss:  0.7203
Epoch 40, Loss:  0.7203
Epoch 60, Loss:  0.7203
Epoch 80, Loss:  0.7203


In [57]:
#超参数
torch.manual_seed(42)
batch_size = 3
block_size = 4

In [58]:
torch.manual_seed(42)
file_name = "test.txt"

with open(file_name, "r", encoding='utf-8') as f:
    text = f.read()

print(text)

第一回  甄士隐梦幻识通灵　贾雨村风尘怀闺秀

列位看官：你道此书从何而来？说起根由，虽近荒唐，细按则深有趣味。待在下将此来历注明，方使阅者了然不惑。

原来女娲氏炼石补天之时，于大荒山无稽崖炼成高经十二丈、方经二十四丈顽石三万六千五百零一块。娲皇氏只用了


In [59]:
#有序、不重复的列表
chars = sorted(list(set(text)))
vocab_size = len(chars)

#字符和整数之间的投影
stoi = {ch : i for i,ch in enumerate(chars)}#符号到整数
itos = {i : ch for i,ch in enumerate(chars)}#整数到符号
encode = lambda str1:[stoi[c] for c in str1]#编码，把字符串转化为数字串（列表）
decode = lambda list1:"".join([itos[i] for i in list1])#解码，把数字转换为字符串

data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]


In [60]:
def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    # token_list = x.tolist()
    # for str_list in token_list:
    #   print(decode(str_list))
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x,y = x.to(device), y.to(device)
    return x,y
get_batch("train")

(tensor([[ 65,   2,  87,  96],
         [ 27,  63,  55, 101],
         [102,  20,  92,  61]]),
 tensor([[  2,  87,  96,  57],
         [ 63,  55, 101,  52],
         [ 20,  92,  61,  12]]))

In [54]:

torch.manual_seed(42)
size = 3  #几个值需要做嵌入
n_embedding = 4  #嵌入后的维度

embedding_table = nn.Embedding(size, n_embedding)
# idx = torch.tensor([0,1,2,3,4,5,6,7,8,9])
idx = torch.arange(3)
print(embedding_table(idx))

tensor([[ 0.3367,  0.1288,  0.2345,  0.2303],
        [-1.1229, -0.1863,  2.2082, -0.6380],
        [ 0.4617,  0.2674,  0.5349,  0.8094]], grad_fn=<EmbeddingBackward0>)


In [61]:
x,y = get_batch("train")
print(x)

tensor([[ 51,  24,  64,  56],
        [ 63,  55, 101,  52],
        [ 14,   7,   3,  52]])


In [68]:
n_embd = 3
token_embedding_table = nn.Embedding(vocab_size, n_embd)
token_embd = token_embedding_table(x)
position_embedding_table = nn.Embedding(block_size, n_embd)
position_idx = torch.arange(block_size)
position_emb = position_embedding_table(position_idx)

print("token_embd",token_embd)
x_list = x.tolist()
for str_list in x_list:
    decoded_str = decode(str_list)
    print(decoded_str)

print("token_embd",token_embd)
print("position_emb",position_emb)

token_embd tensor([[[-1.5322, -0.1141, -0.9761],
         [ 0.0521,  0.2882,  0.3520],
         [ 0.3848,  0.2943,  0.6351],
         [ 0.6860,  0.8626, -0.4877]],

        [[-0.2195,  1.5598, -0.0529],
         [ 0.9251,  1.6376, -0.9297],
         [-0.4352, -1.8566, -1.5076],
         [ 0.4865, -0.6963,  1.9266]],

        [[ 0.6834,  2.3535,  0.0434],
         [ 1.8833, -1.2941, -1.2332],
         [ 0.6737, -0.6979,  1.7839],
         [ 0.4865, -0.6963,  1.9266]]], grad_fn=<EmbeddingBackward0>)
按则深有
注明，方
二丈、方
token_embd tensor([[[-1.5322, -0.1141, -0.9761],
         [ 0.0521,  0.2882,  0.3520],
         [ 0.3848,  0.2943,  0.6351],
         [ 0.6860,  0.8626, -0.4877]],

        [[-0.2195,  1.5598, -0.0529],
         [ 0.9251,  1.6376, -0.9297],
         [-0.4352, -1.8566, -1.5076],
         [ 0.4865, -0.6963,  1.9266]],

        [[ 0.6834,  2.3535,  0.0434],
         [ 1.8833, -1.2941, -1.2332],
         [ 0.6737, -0.6979,  1.7839],
         [ 0.4865, -0.6963,  1.9266]]], grad_fn=<

104
