In [1]:
import numpy as np
import torch
import torch.nn as tnn
import torch.nn.functional as F
import torch.optim as topti
from torchtext import data
from torchtext.vocab import GloVe
from imdb_dataloader import IMDB

# 构建神经网络

In [38]:
# Class for creating the neural network.
class Network(tnn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.lstm1 = tnn.LSTM(50, 300, 1,batch_first=True)
        self.selu = tnn.SELU()
        self.dropout = tnn.Dropout(0.5)
        self.dense1 = tnn.Linear(300, 200)
        self.norm1 = tnn.BatchNorm1d(200)
        self.dense2 = tnn.Linear(200, 128)
        self.dense3 = tnn.Linear(128,64)
        self.dense4 = tnn.Linear(64,1)

    def forward(self, input, length):
        """
        DO NOT MODIFY FUNCTION SIGNATURE
        Create the forward pass through the network.
        """
        o,(h_n,h_c) = self.lstm1(input)#[0][-1].view(-1,300)
        x = self.selu(o[:,-1,:].view(-1,300))
        x = self.dense1(x)
        x = self.norm1(x)
        x = self.dropout(x)
        x = self.dense2(x)
        x = self.dropout(x)
        x = self.selu(x)
        x = self.dense3(x)
        x = self.dropout(x)
        x = self.dense4(x)

        # TODO Weight Normalization
        return torch.sigmoid(x).view(-1)

In [31]:
class PreProcessing():

    def pre(x):
        
        """Called after tokenization 把句子拆成单词 """
        """
        GRAM too small ....
        """
#         _arr = np.array([item for sublist in x for item in sublist]).reshape(1,-1)
#         for i,dim in enumerate(_arr):
#             _index = np.argwhere((dim == '/><br') == True).flatten()
#             for j in _index:
#                 _arr[i][j] = ""
#                 try:
#                     _arr[i][j-1] =_arr[i][j-1].replace("<br")
#                     _arr[i][j+1] =_arr[i][j+1].replace("/>")
#                 except IndexError:
#                     print("outofbound")
#         #return _arr.flatten().tolist()
        return x
    def post(batch, vocab):
    
        return batch
    text_field = data.Field(lower=True, include_lengths=True, batch_first=True, preprocessing=pre, postprocessing=post)

In [40]:
def lossFunc():
    """
    Define a loss function appropriate for the above networks that will
    add a sigmoid to the output and calculate the binary cross-entropy.
    """
    return tnn.BCELoss()

In [33]:
def save(_net,PATH='./model.pth',device=torch.device('cuda')):
    _net.to(device)
    torch.save(_net.state_dict(),PATH)

In [34]:
def evaluate(_net):
    num_correct = 0
    with torch.no_grad():
        for batch in testLoader:
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # Get predictions
            outputs = _net(inputs, length)
            predicted = torch.round(outputs)
            num_correct += torch.sum(labels == predicted).item()

    accuracy = 100 * num_correct / len(dev)
    print(f"Classification accuracy: {accuracy}")
    return accuracy

## 加载数据集

In [35]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


textField = PreProcessing.text_field
labelField = data.Field(sequential=False)

train, dev = IMDB.splits(textField, labelField, train="train", validation="dev")

textField.build_vocab(train, dev, vectors=GloVe(name="6B", dim=50))
labelField.build_vocab(train, dev)

trainLoader, testLoader = data.BucketIterator.splits((train, dev), shuffle=True, batch_size=64,
                                                     sort_key=lambda x: len(x.text), sort_within_batch=True)


In [41]:
def main():
    # Use a GPU if available, as it should be faster.
    print("Using device: " + str(device))
    # torch.cuda.empty_cache()
    # Load the training dataset, and create a data loader to generate a batch.

    net = Network().to(device)
    criterion =lossFunc()
    optimiser = topti.Adam(net.parameters(), lr=1e-3)  # Minimise the loss using the Adam algorithm.
    scheduler = torch.optim.lr_scheduler.StepLR(optimiser,step_size=4, gamma=0.6)
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimiser, 0.9, last_epoch=-1)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimiser,12)
    for epoch in range(10):
        running_loss = 0
        for i, batch in enumerate(trainLoader):
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)
            
            labels -= 1
            if 6 < epoch <= 15:
                inputs += (torch.randn(inputs.shape[1],inputs.shape[2])*0.3).to(device)
            
            inputs += (torch.randn(inputs.shape[1],inputs.shape[2])*0.2).to(device)
                
            # PyTorch calculates gradients by accumulating contributions to them (useful for
            # RNNs).  Hence we must manually set them to zero before calculating them.
            optimiser.zero_grad()
            
            # Forward pass through the network.
            output = net(inputs, length)

            loss = criterion(output, labels)

            # Calculate gradients.
            loss.backward()

            # Minimise the loss according to the gradient.
            optimiser.step()
            
            running_loss += loss.item()
        
            if i % 32 == 31:
                print("Epoch: %2d, Batch: %4d, Loss: %.3f" % (epoch + 1, i + 1, running_loss / 32))
                running_loss = 0
        scheduler.step()
        
    # Evaluation and save model
        print()
        print("###########################################")
        print("Current Validation Acc")
        new_acc = evaluate(net)
        PATH = "./model.pth"
        old_net = Network().to(device)
        try:
            old_net.load_state_dict(torch.load(PATH))
        except OSError:
            print("No available model!")
        print("Former Validation Acc")
        old_acc = evaluate(old_net)
        if new_acc > old_acc:
            save(net)
            print()
            print("Better Accuracy!!!")
            print("Saved model")
        print("###########################################\n")
    print("Trainning Complete")
    
    # Make the best acc model for cpu
    net = Network().to(device)
    net.load_state_dict(torch.load(PATH))
    print("Best Result: ")
    acc = evaluate(net)
    save(net,device=torch.device('cpu'))
    
if __name__ == '__main__':
    main()

Using device: cuda:0


RuntimeError: CUDA error: device-side assert triggered