In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from transformers import AdamW ##新ㄉ 好像比較好
from transformers import BertForSequenceClassification
from transformers import BertTokenizer

In [3]:
NUM_LABELS = 5
# for train
BATCH_SIZE = 1
EPOCHS = 2



In [4]:
# 60000筆 0.9,0.05,0.05切
data = pd.read_csv('./test_fix.csv')
data_y = data['type'].tolist()
data_x = data['text']

train_x = data_x[:54000].tolist()
train_y = np.array(data_y[:54000])

test_x = data_x[54000:57000].tolist()
test_y = np.array(data_y[54000:57000])

dev_x = data_x[57000:].tolist()
dev_y = np.array(data_y[57000:])

#先用原本的bert
model_path = '../chinese_wwm_pytorch/'

In [5]:
class TrainDataset(Dataset):
    def __init__(self,input_dict,y):
        self.input_ids = input_dict['input_ids']
        self.token_type_ids = input_dict['token_type_ids']
        self.attention_mask = input_dict['attention_mask']
        self.y = y
    def __getitem__(self, idx):
        input_id = self.input_ids[idx]
        tokentype = self.token_type_ids[idx]
        attentionmask = self.attention_mask[idx]
        y = self.y[idx]
        return input_id, tokentype, attentionmask, y
    def __len__(self):
        return len(self.input_ids)

In [6]:
tokenizer = BertTokenizer.from_pretrained(model_path)
train_input = tokenizer.batch_encode_plus(train_x,
                                          add_special_tokens = True,
                                          max_length = 512,
                                          truncation = True,                ##是否截斷
                                          return_special_tokens_mask = True,
                                          pad_to_max_length = True,
                                          return_tensors = 'pt')


In [7]:
trainset = TrainDataset(train_input, train_y) ##trainset參數如init
trainloader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)  

In [8]:
test_input = tokenizer.batch_encode_plus(test_x,
                                         add_special_tokens = True,
                                         max_length = 512,
                                         truncation = True,
                                         return_special_tokens_mask = True,
                                         pad_to_max_length = True,
                                         return_tensors = 'pt')
testset = TrainDataset(test_input, test_y)
testloader = DataLoader(testset, batch_size = BATCH_SIZE, shuffle = True)

In [9]:
dev_input = tokenizer.batch_encode_plus(dev_x,
                                         add_special_tokens = True,
                                         max_length = 512,
                                         truncation = True,
                                         return_special_tokens_mask = True,
                                         pad_to_max_length = True,
                                         return_tensors = 'pt')
devset = TrainDataset(dev_input, dev_y)
devloader = DataLoader(testset, batch_size = BATCH_SIZE, shuffle = True)

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def get_test_acc(model, testloader):
    model.eval()  ##test mode
    total = 0 ##total_num
    correct = 0 ##correct_num
    with torch.no_grad():   ##eval不計算gradient 
        for data in testloader:
            tokens_tensors , segment_tensors,masks_tensors,labels = [t.to(device) for t in data]
            outputs = model(input_ids = tokens_tensors,
                           token_type_ids = segment_tensors,
                           attention_mask = masks_tensors,
                           labels = labels)
            pred = torch.argmax(outputs[1],dim=-1)
            total += labels.size()[0]
            correct += (pred == labels).sum().item()
    
    return correct/total

In [11]:
model = BertForSequenceClassification.from_pretrained(model_path,num_labels = NUM_LABELS)
model = model.to(device)
model.train()




Some weights of the model checkpoint at ../chinese_wwm_pytorch/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpo

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [12]:
optimizer = AdamW(model.parameters(),lr = 2e-5)

highest_test = 0

for epoch in range(0,EPOCHS):
    i = 0
    running_loss = 0.0
    correct = 0
    total = 0

    for (i,data) in enumerate(trainloader):
        
        tokens_tensors ,  segments_tensors , masks_tensors , labels  = [t.to(device) for t in data]
        
        bert_outputs = model(input_ids=tokens_tensors, 
                             token_type_ids=segments_tensors, 
                             attention_mask=masks_tensors,
                             labels = labels)
        
        optimizer.zero_grad()
        logits = bert_outputs[1]
        
        pred = torch.argmax(logits,dim = -1)
        
        loss = bert_outputs[0]
        loss.backward()
        optimizer.step()
        
        total += pred.size()[0]
        correct += (pred == labels).sum().item()
        running_loss += loss.item()
        print(f'\rEpoch [{epoch+1}/{EPOCHS}] {i}/{len(trainloader)} Loss: {running_loss:.4f} Acc : {(correct/total):.4f}', end='')
    test_acc = get_test_acc(model,testloader)
    if test_acc > highest_test:
        highest_test = test_acc
        torch.save(model.state_dict(),'./BERT_Sogou_AdamW' + str(epoch+1) + '.pkl')
    print('test acc:' , test_acc)


Epoch [1/2] 4/54000 Loss: 7.8457 Acc : 0.0000

RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`
Exception raised from gemm<float> at /pytorch/aten/src/ATen/cuda/CUDABlas.cpp:165 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x42 (0x7fa8e2be91e2 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0xefd203 (0x7fa8e3f4f203 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #2: at::native::(anonymous namespace)::addmm_out_cuda_impl(at::Tensor&, at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::Scalar, c10::Scalar) + 0xf15 (0x7fa8e5039145 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #3: at::native::mm_cuda(at::Tensor const&, at::Tensor const&) + 0xb3 (0x7fa8e503b043 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #4: <unknown function> + 0xf22a20 (0x7fa8e3f74a20 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #5: <unknown function> + 0xa56530 (0x7fa91e6a8530 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #6: at::Tensor c10::Dispatcher::call<at::Tensor, at::Tensor const&, at::Tensor const&>(c10::TypedOperatorHandle<at::Tensor (at::Tensor const&, at::Tensor const&)> const&, at::Tensor const&, at::Tensor const&) const + 0xbc (0x7fa91ee9081c in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #7: at::mm(at::Tensor const&, at::Tensor const&) + 0x4b (0x7fa91ede16ab in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #8: <unknown function> + 0x2ed0a2f (0x7fa920b22a2f in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #9: <unknown function> + 0xa56530 (0x7fa91e6a8530 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #10: at::Tensor c10::Dispatcher::call<at::Tensor, at::Tensor const&, at::Tensor const&>(c10::TypedOperatorHandle<at::Tensor (at::Tensor const&, at::Tensor const&)> const&, at::Tensor const&, at::Tensor const&) const + 0xbc (0x7fa91ee9081c in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #11: at::Tensor::mm(at::Tensor const&) const + 0x4b (0x7fa91ef76cab in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #12: <unknown function> + 0x2d11fbb (0x7fa920963fbb in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #13: torch::autograd::generated::MmBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x25f (0x7fa92097f7df in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #14: <unknown function> + 0x3375bb7 (0x7fa920fc7bb7 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #15: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&, std::shared_ptr<torch::autograd::ReadyQueue> const&) + 0x1400 (0x7fa920fc3400 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #16: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&) + 0x451 (0x7fa920fc3fa1 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::Engine::thread_init(int, std::shared_ptr<torch::autograd::ReadyQueue> const&, bool) + 0x89 (0x7fa920fbc119 in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #18: torch::autograd::python::PythonEngine::thread_init(int, std::shared_ptr<torch::autograd::ReadyQueue> const&, bool) + 0x4a (0x7fa92e75c4ba in /home/nlp/.local/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #19: <unknown function> + 0xbd6df (0x7fa975fb56df in /usr/lib/x86_64-linux-gnu/libstdc++.so.6)
frame #20: <unknown function> + 0x76db (0x7fa97bad76db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #21: clone + 0x3f (0x7fa97be10a3f in /lib/x86_64-linux-gnu/libc.so.6)
