In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchtext
import math
import numpy as np
from   torchtext.vocab import GloVe
from   torchtext.legacy.data import Field
from   torch.optim import lr_scheduler

##### 定义变量

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

##### 创建数据集

In [3]:
TEXT                 = torchtext.legacy.data.Field(lower=True,fix_length=200,batch_first=False)
LABEL                = torchtext.legacy.data.Field(sequential=False)
train,test           = torchtext.legacy.datasets.IMDB.splits(TEXT,LABEL)
TEXT.build_vocab(train,vectors=None,max_size=10000,min_freq=10)
LABEL.build_vocab(train)
train_iter,test_iter = torchtext.legacy.data.BucketIterator.splits((train,test),batch_size=16)

##### 创建模型

In [4]:
'''  定义位置编码函数  '''
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=200):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe           = torch.zeros(max_len, d_model)
        position     = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term     = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2]  = torch.sin(position * div_term)
        pe[:, 1::2]  = torch.cos(position * div_term)
        pe           = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x            = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [5]:
hidden_size  = 300
embeding_dim = 100
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.em                  = nn.Embedding(len(TEXT.vocab.stoi), embeding_dim)   # 200*batch*100
        self.pos                 = PositionalEncoding(embeding_dim)
        self.encoder_layer       = nn.TransformerEncoderLayer(d_model=embeding_dim,nhead=5)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer,num_layers=6)
        self.fc1                 = nn.Linear(200, 256)
        self.fc2                 = nn.Linear(256, 3)  # 定义三分类模型

    def forward(self, inputs):
        x = self.em(inputs)
        x = self.pos(x)
        x = self.transformer_encoder(x)
        x = x.permute(1, 0, 2)
        x = torch.sum(x, dim=-1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

##### 训练模型

In [6]:
model      = Net()
model      = model.to(device)
loss_fn    = nn.CrossEntropyLoss()
optimizer  = torch.optim.Adam(model.parameters(), lr=0.0001)
epochs     = 100
train_loss = []
train_acc  = []
test_loss  = []
test_acc   = []

In [7]:
def fit(epoch, model, trainloader, testloader):
    correct      = 0
    total        = 0
    running_loss = 0
    
    model.train()
    for b in trainloader:
        x, y   = b.text.to(device), b.label.to(device)
        y_pred = model(x)
        loss   = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            y_pred        = torch.argmax(y_pred, dim=1)
            correct      += (y_pred == y).sum().item()
            total        += y.size(0)
            running_loss += loss.item()
    epoch_loss = running_loss / len(trainloader.dataset)
    epoch_acc  = correct / total
        
        
    test_correct      = 0
    test_total        = 0
    test_running_loss = 0 
    
    model.eval()
    with torch.no_grad():
        for b in testloader:
            x, y               = b.text.to(device), b.label.to(device)
            y_pred             = model(x)
            loss               = loss_fn(y_pred, y)
            y_pred             = torch.argmax(y_pred, dim=1)
            test_correct      += (y_pred == y).sum().item()
            test_total        += y.size(0)
            test_running_loss += loss.item()
    
    epoch_test_loss = test_running_loss / len(testloader.dataset)
    epoch_test_acc  = test_correct / test_total
    print('epoch: ', epoch, 'loss： ', round(epoch_loss, 3),'accuracy:', round(epoch_acc, 3),'test_loss： ', round(epoch_test_loss, 3),'test_accuracy:', round(epoch_test_acc, 3) )
    return epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc

In [None]:
for epoch in range(epochs):
    epoch_loss,epoch_acc,epoch_test_loss,epoch_test_acc = fit(epoch,model,train_iter,test_iter)
    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)
    test_loss.append(epoch_test_loss)
    test_acc.append(epoch_test_acc)

epoch:  0 loss：  0.044 accuracy: 0.499 test_loss：  0.043 test_accuracy: 0.5
epoch:  1 loss：  0.043 accuracy: 0.501 test_loss：  0.043 test_accuracy: 0.5
