In [1]:
import torch
from torch import nn
from d2l import torch as d2l
from net_frame import *
from data_precess import get_data

In [2]:
batch_size = 64
train_iter, test_iter, train_vocab, test_voacb = get_data()

read pos:  96%|█████████▌| 11972/12500 [00:00<00:00, 62182.23it/s]

read pos: 100%|██████████| 12500/12500 [00:00<00:00, 59943.02it/s]
read neg: 100%|██████████| 12500/12500 [00:00<00:00, 76670.02it/s]


Load raw data use time:0.41604113671928644s


read pos: 100%|██████████| 12500/12500 [00:00<00:00, 77021.89it/s]
read neg: 100%|██████████| 12500/12500 [00:00<00:00, 76492.83it/s]


Load raw data use time:0.34792733285576105s
Build vocab....
Finish!
Build data-iter...
Finish!


In [3]:
class BiRNN(nn.Module):
    """双向RNN"""
    def __init__(self, vocab_size, embed_size, num_hiddens,
                 num_layers, **kwargs):
        super(BiRNN, self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        # 将bidirectional设置为True以获取双向循环神经网络
        self.encoder = nn.LSTM(embed_size, num_hiddens, num_layers=num_layers,
                                bidirectional=True)
        self.decoder = nn.Linear(4 * num_hiddens, 2)

    def forward(self, inputs):
        # inputs的形状是（批量大小，时间步数）
        # 因为长短期记忆网络要求其输入的第一个维度是时间维，
        # 所以在获得词元表示之前，输入会被转置。
        # 输出形状为（时间步数，批量大小，词向量维度）
        embeddings = self.embedding(inputs.T)
        self.encoder.flatten_parameters()
        # 返回上一个隐藏层在不同时间步的隐状态，
        # outputs的形状是（时间步数，批量大小，2*隐藏单元数）
        outputs, _ = self.encoder(embeddings)
        # 连结初始和最终时间步的隐状态，作为全连接层的输入，
        # 其形状为（批量大小，4*隐藏单元数）
        encoding = torch.cat((outputs[0], outputs[-1]), dim=1)
        outs = self.decoder(encoding)
        return outs


In [18]:
# 权重初始化函数
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
    if type(m) == nn.LSTM:
        for param in m._flat_weights_names:
            if "weight" in param:
                nn.init.xavier_uniform_(m._parameters[param])
# 搭建网络
embed_size, num_hiddens, num_layers = 100, 100, 2
# devices = d2l.try_all_gpus()
net = BiRNN(len(train_vocab), embed_size, num_hiddens, num_layers)
net.apply(init_weights)

BiRNN(
  (embedding): Embedding(280619, 100)
  (encoder): LSTM(100, 100, num_layers=2, bidirectional=True)
  (decoder): Linear(in_features=400, out_features=2, bias=True)
)

In [5]:
# 使用预训练的embedding层
from embed_layer import TokenEmbedding
glove_embedding = TokenEmbedding('glove.6b.100d') # 100维，对应embed_size

In [19]:
# 测试
embeds = glove_embedding[train_vocab.idx_to_token]
print(embeds.shape)

# 加载
net.embedding.weight.data.copy_(embeds)
net.embedding.weight.requires_grad = False

torch.Size([280619, 100])


In [20]:
from tqdm import tqdm
# 计算准确率
def accurancy(net,train_iter):
    """计算准确率"""
    total_nums = 0
    correct_nums = 0
    for batch in train_iter:
        return

# 定义训练函数
def train(net,trainer:torch.optim.Adam,train_iter,loss_fn,lr,num_epochs,device_idx = None):
    """训练情感分析模型"""
    # 设置设备
    if device_idx == None:
        device = try_gpu(i = 0)
    else:
        device = try_gpu(device_idx)
    
    # 加载网络
    net = net.to(device)
    net.train()

    # 开始训练
    loss_plt = []
    for epoch in range(num_epochs):
        loop = tqdm(train_iter,desc = f"Epoch:[{epoch + 1}/{num_epochs}]",
                    total = len(train_iter))
        loss_temp = 0
        total_nums = 0
        for batch in loop:
            # 清空梯度
            trainer.zero_grad()

            # forward
            X,Y = batch
            X = X.to(device)
            Y = Y.to(device)
            # print(X.shape,Y.shape)
            y_pred = net(X)
            total_nums += X.shape[0]

            # count loss and backwar
            loss = loss_fn(y_pred,Y)
            loss.sum().backward()
            # trainer.step()

            # 先step后再调用item()，否则切断计算图
            loss_temp += loss.sum().item()
            
            # # update parameters
            trainer.step()
            loop.set_postfix({"LOSS" : loss_temp / total_nums,"lr" : "{:e}".format(trainer.param_groups[0]['lr'])})
        loss_plt.append(loss_temp)
    return loss_plt

In [12]:
from tqdm import tqdm

def train_batch_ch13(net, X, y, loss, trainer, device):
    """Train for a minibatch with multiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`"""
    # if isinstance(X, list):
    #     # Required for BERT fine-tuning (to be covered later)
    #     X = [x.to(devices[0]) for x in X]
    # else:
    #     X = X.to(devices[0])
    X = X.to(device)
    y = y.to(device)
    net.train()
    trainer.zero_grad()
    pred = net(X)
    l = loss(pred, y)
    l.sum().backward()
    trainer.step()
    train_loss_sum = l.sum()
    train_acc_sum = d2l.accuracy(pred, y)
    return train_loss_sum, train_acc_sum

def train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs,
               device_idx = 0):
    """Train a model with multiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`"""
    # timer, num_batches = d2l.Timer(), len(train_iter)
    # animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
    #                         legend=['train loss', 'train acc', 'test acc'])
    # net = nn.DataParallel(net, device_ids=devices).to(devices[0])
    device = d2l.try_gpu(device_idx)
    net = net.to(device)
    for epoch in range(num_epochs):
        # Sum of training loss, sum of training accuracy, no. of examples,
        # no. of predictions
        # metric = d2l.Accumulator(4)
        loop = tqdm(enumerate(train_iter),desc = f"Epoch:[{epoch + 1}/{num_epochs}]",
                    total = len(train_iter))
        loss_temp = 0
        total_nums = 0
        for i, (features, labels) in loop:
            # timer.start()
            l, acc = train_batch_ch13(
                net, features, labels, loss, trainer, device)
            # metric.add(l, acc, labels.shape[0], labels.numel())
            # timer.stop()
            # if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
            #     animator.add(epoch + (i + 1) / num_batches,
            #                  (metric[0] / metric[2], metric[1] / metric[3],
            #                   None))
            loss_temp += l.item()
            total_nums += labels.shape[0]
            loop.set_postfix({"Loss":loss_temp / total_nums})
        # test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        # animator.add(epoch + 1, (None, None, test_acc))
    # print(f'loss {metric[0] / metric[2]:.3f}, train acc '
    #       f'{metric[1] / metric[3]:.3f}, test acc {test_acc:.3f}')
    # print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on '
    #       f'{str(devices)}')

In [21]:
# 训练超参数
lr, num_epochs = 0.01, 5
loss = nn.CrossEntropyLoss(reduction = 'none') # 返回每个样本的损失，即reduction = 'None'
trainer = torch.optim.Adam(net.parameters(),lr = lr)
loss_plt = train(net,trainer,train_iter,loss,lr,num_epochs,device_idx = 2)

Epoch:[1/5]:   2%|▏         | 9/391 [00:00<00:09, 38.96it/s, LOSS=0.714, lr=1.000000e-02]

Epoch:[1/5]: 100%|██████████| 391/391 [00:08<00:00, 45.16it/s, LOSS=0.598, lr=1.000000e-02]
Epoch:[2/5]: 100%|██████████| 391/391 [00:08<00:00, 45.53it/s, LOSS=0.392, lr=1.000000e-02]
Epoch:[3/5]: 100%|██████████| 391/391 [00:08<00:00, 45.36it/s, LOSS=0.346, lr=1.000000e-02]
Epoch:[4/5]: 100%|██████████| 391/391 [00:08<00:00, 45.05it/s, LOSS=0.32, lr=1.000000e-02] 
Epoch:[5/5]: 100%|██████████| 391/391 [00:08<00:00, 45.38it/s, LOSS=0.29, lr=1.000000e-02] 


In [None]:
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss(reduction="none")
train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs,
    device_idx = 2)