## 1 导包

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertConfig, BertForTokenClassification

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## 2 读取数据

In [3]:
with open('dataset/train.txt', 'r', encoding='utf-8') as f:
    to_df = []
    count = 1

    for line in tqdm(f.read().split('\n')):
        sentence_id = f"train_{count}"

        if line != '\n' and len(line.strip())>0:
            word_list = line.split()
            if len(word_list) == 2:
                to_df.append([sentence_id] + word_list)
            else: # 如果是空格，split后长度只有1 (tag)
                to_df.append([sentence_id, "[SEP]", word_list[-1]])

        else:
            count += 1

100%|██████████| 2288791/2288791 [00:05<00:00, 420324.44it/s]


In [4]:
data = pd.DataFrame(to_df, columns=['sentence_id', 'words', 'tags'])
data.head()

Unnamed: 0,sentence_id,words,tags
0,train_1,手,B-40
1,train_1,机,I-40
2,train_1,三,B-4
3,train_1,脚,I-4
4,train_1,架,I-4


In [5]:
data['sentence'] = data[['sentence_id','words','tags']].groupby(['sentence_id'])['words'].transform(lambda x: ' '.join(x))
data['word_labels'] = data[['sentence_id','words','tags']].groupby(['sentence_id'])['tags'].transform(lambda x: ','.join(x))
data.head()

Unnamed: 0,sentence_id,words,tags,sentence,word_labels
0,train_1,手,B-40,手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...,"B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-..."
1,train_1,机,I-40,手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...,"B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-..."
2,train_1,三,B-4,手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...,"B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-..."
3,train_1,脚,I-4,手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...,"B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-..."
4,train_1,架,I-4,手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...,"B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-..."


In [6]:
labels_to_ids = {k: v for v, k in enumerate(data.tags.unique())}
ids_to_labels = {v: k for v, k in enumerate(data.tags.unique())}
labels_to_ids

{'B-40': 0,
 'I-40': 1,
 'B-4': 2,
 'I-4': 3,
 'B-14': 4,
 'I-14': 5,
 'B-5': 6,
 'I-5': 7,
 'B-7': 8,
 'I-7': 9,
 'B-11': 10,
 'I-11': 11,
 'B-13': 12,
 'I-13': 13,
 'B-8': 14,
 'I-8': 15,
 'O': 16,
 'B-16': 17,
 'I-16': 18,
 'B-29': 19,
 'I-29': 20,
 'B-9': 21,
 'I-9': 22,
 'B-12': 23,
 'I-12': 24,
 'B-18': 25,
 'I-18': 26,
 'B-1': 27,
 'I-1': 28,
 'B-3': 29,
 'I-3': 30,
 'B-22': 31,
 'I-22': 32,
 'B-37': 33,
 'I-37': 34,
 'B-39': 35,
 'I-39': 36,
 'B-10': 37,
 'I-10': 38,
 'B-36': 39,
 'I-36': 40,
 'B-34': 41,
 'I-34': 42,
 'B-31': 43,
 'I-31': 44,
 'B-38': 45,
 'I-38': 46,
 'B-54': 47,
 'I-54': 48,
 'B-6': 49,
 'I-6': 50,
 'B-30': 51,
 'I-30': 52,
 'B-15': 53,
 'I-15': 54,
 'B-2': 55,
 'I-2': 56,
 'B-49': 57,
 'I-49': 58,
 'B-21': 59,
 'I-21': 60,
 'B-47': 61,
 'I-47': 62,
 'B-23': 63,
 'I-23': 64,
 'B-20': 65,
 'I-20': 66,
 'B-50': 67,
 'I-50': 68,
 'B-46': 69,
 'I-46': 70,
 'B-41': 71,
 'I-41': 72,
 'B-43': 73,
 'I-43': 74,
 'B-48': 75,
 'I-48': 76,
 'B-19': 77,
 'I-19': 78,
 'B-

In [7]:
data = data[["sentence", "word_labels"]].drop_duplicates().reset_index(drop=True)
# 也可以根据sentence_id去重
data.head()

Unnamed: 0,sentence,word_labels
0,手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...,"B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-..."
1,牛 皮 纸 袋 手 提 袋 定 制 l o g o 烘 焙 购 物 服 装 包 装 外 卖 ...,"B-4,I-4,I-4,I-4,B-4,I-4,I-4,B-29,I-29,I-29,I-2..."
2,彩 色 金 属 镂 空 鱼 尾 夹 长 尾 夹 [SEP] 手 帐 设 计 绘 图 文 具 ...,"B-16,I-16,B-12,I-12,B-13,I-13,B-4,I-4,I-4,B-4,..."
3,B o s e [SEP] S o u n d S p o r t [SEP] F r e ...,"B-1,I-1,I-1,I-1,O,B-3,I-3,I-3,I-3,I-3,I-3,I-3,..."
4,壁 挂 炉 专 用 水 空 调 散 热 器 带 风 扇 暖 气 片 水 暖 空 调 明 装 ...,"B-4,I-4,I-4,O,O,B-4,I-4,I-4,B-4,I-4,I-4,B-22,I..."


In [8]:
data['sentence'].apply(lambda x:len(x.split(' '))).describe()

count    39995.000000
mean        56.220828
std         13.473300
min          7.000000
25%         46.000000
50%         56.000000
75%         65.000000
max        101.000000
Name: sentence, dtype: float64

## 3 超参数设置

In [9]:
MAX_LEN = 105 
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
EPOCHS = 10
LEARNING_RATE = 2e-05
MAX_GRAD_NORM = 5
MODEL_NAME='bert-base-chinese'
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME) # encode_plus()# 整体

## 4 处理数据集

In [10]:
def tokenize_and_preserve_labels(sentence, text_labels, tokenizer):
    
    tokenized_sentences = []
    labels = []
    sentence = sentence.strip() 

    for word, label in zip(sentence.split(), text_labels.split(',')):
        tokenized_word = tokenizer.tokenize(word)  # 逐字分词
        len_after_tok = len(tokenized_word)

        tokenized_sentences.extend(tokenized_word)  # 将单个字分词结果追加到句子分词列表
        labels.extend([label] * len_after_tok)  # 一个词tokenizer后可能会变成多个，将其每一个部分的label都标记为相同
    
    return tokenized_sentences, labels,

In [11]:
data.iloc[0]

sentence       手 机 三 脚 架 网 红 直 播 支 架 桌 面 自 拍 杆 蓝 牙 遥 控 三 脚 架 ...
word_labels    B-40,I-40,B-4,I-4,I-4,B-14,I-14,B-5,I-5,B-4,I-...
Name: 0, dtype: object

In [12]:
# tokenize_and_preserve_labels(data.iloc[0]['sentence'],data.iloc[0]['word_labels'],tokenizer)

In [13]:
class MyDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __getitem__(self, index):
        # 步骤 1: 对每个句子分词
        sentence = self.data['sentence'][index]
        text_labels = self.data['word_labels'][index]
        words_list, labels = tokenize_and_preserve_labels(sentence, text_labels, self.tokenizer)

        # 步骤 2: 添加特殊token并添加对应的标签
        words_list = ["[CLS]"] + words_list + ["[SEP]"]
        labels = ["O"] + labels + ["O"]

        # 步骤 3: 截断/填充
        if len(words_list) > self.max_len:
            words_list = words_list[:self.max_len]
            labels = labels[:self.max_len]
        elif len(words_list) < self.max_len:
            # words_list = words_list + ['[PAD]'for _ in range(self.max_len - len(words_list))]
            # labels = labels + ["O" for _ in range(self.max_len - len(labels))]

            # 用extend不需要赋值！！！
            words_list.extend((self.max_len-len(words_list)) * ["[PAD]"])
            labels.extend((self.max_len-len(labels)) * "O")
        else:
            pass

        # 步骤 4: 构建attention mask
        attention_masks = [1 if word != "[PAD]" else 0 for word in words_list]

        # 步骤 5: 将分词结果转为词表的id表示
        input_ids = self.tokenizer.convert_tokens_to_ids(words_list)
        label_ids = [labels_to_ids[label] for label in labels]

        return {
            "ids" : torch.tensor(input_ids, dtype=torch.long),
            "masks" : torch.tensor(attention_masks, dtype=torch.long),
            "targets" : torch.tensor(label_ids, dtype=torch.long)
        }
    def __len__(self):
        return self.len

In [14]:
from sklearn.model_selection import train_test_split

train_dataset, test_dataset = train_test_split(data,test_size=0.2,random_state=42)

train_dataset = train_dataset.reset_index(drop=True)
test_dataset = test_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(data.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = MyDataset(train_dataset, tokenizer, MAX_LEN)
testing_set = MyDataset(test_dataset, tokenizer, MAX_LEN)

FULL Dataset: (39995, 2)
TRAIN Dataset: (31996, 2)
TEST Dataset: (7999, 2)


In [15]:
training_set[1]

{'ids': tensor([ 101,  881, 5543, 8020,  100,  143,  156,  157,  156, 8021,  100,  100,
          100,  123,  130,  121,  121,  116, 7946, 4635, 4080, 1045, 2802, 1313,
         3322, 2207, 1798, 4080, 1045, 2802, 1313, 3322,  100,  125, 2802, 1313,
         3322, 1555, 1218, 2157, 4500, 1215, 1062,  102, 2135, 3175, 3403, 6981,
          116,  122, 3118,  881, 5543, 1333, 6163, 4797, 7961,  102,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0]),
 'masks': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0

## 5 创建dataloader

In [16]:
train_dataloader = DataLoader(training_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  num_workers=0)
test_dataloader = DataLoader(testing_set, batch_size=VALID_BATCH_SIZE, shuffle=True,  num_workers=0)

## 6 定义网络

In [17]:
# 输出有两个：一个为loss和一个为logits
model = BertForTokenClassification.from_pretrained(MODEL_NAME, num_labels=len(labels_to_ids))
model.to(device)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-c

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

## 7 模型训练

BERT模型的输入是都是(batch_size, sequence_length)，即二维张量     
如果准备的输入是一维张量，需要.unsqueeze(0)

In [18]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

模型输出有两个：一个为loss和一个为logits      
logits维度为 (batch_size, sequence_length, num_labels)

In [19]:
# 训练函数
def train():
    tr_loss, tr_accuracy = 0, 0
    num_tr_steps = 0
    # 将model设置为train模式
    model.train()

    for idx, data in enumerate(train_dataloader):
        input_ids = data["ids"].to(device, dtype=torch.long)
        attention_masks = data["masks"].to(device, dtype=torch.long)
        labels = data["targets"].to(device, dtype=torch.long)
        # 模型输出有两个：一个为loss和一个为logits 
        outputs = model(input_ids=input_ids, attention_mask=attention_masks, labels=labels)
        loss = outputs[0]
        logits = outputs[1]

        tr_loss += loss.item()
        num_tr_steps += 1

        if idx % 500==0:
            loss_step = tr_loss/num_tr_steps
            print(f"Training loss per 500 training steps: {loss_step}")
        
        # 计算准确率
        flattened_labels = labels.view(-1)  # 本来是二维: batch个labels组成的列表,展平成一维(batch_size * seq_len)
        active_logits = logits.view(-1, model.num_labels) # 模型输出shape (batch_size * seq_len, num_labels)
        flattened_logits = torch.argmax(active_logits, axis=1)

        # MASK所有的[PAD]
        activate_accuracy = attention_masks.view(-1) == 1
        targets = torch.masked_select(flattened_labels, activate_accuracy)
        predictions = torch.masked_select(flattened_logits, activate_accuracy)

        tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy

        # 梯度剪切
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=MAX_GRAD_NORM
        )
        
        # loss反向求导
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss/num_tr_steps 
    tr_accuracy = tr_accuracy/num_tr_steps


    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training accuracy epoch: {tr_accuracy}")

# # 训练函数
# def train():
#     tr_loss, tr_accuracy = 0, 0
#     nb_tr_examples, nb_tr_steps = 0, 0
#     tr_preds, tr_labels = [], []
#     # 将model设置为train模式
#     model.train()
    
#     for idx, batch in enumerate(train_dataloader):
        
#         ids = batch['ids'].to(device, dtype = torch.long) #(4,91)
#         mask = batch['masks'].to(device, dtype = torch.long) #(4,91)
#         targets = batch['targets'].to(device, dtype = torch.long)#(4,91)
        
        
#         outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
#         loss, tr_logits = outputs[0],outputs[1]
#         # print(outputs.keys())
#         # print(loss)
#         tr_loss += loss.item()

#         nb_tr_steps += 1
#         nb_tr_examples += targets.size(0)
        
#         if idx % 500==0:
#             loss_step = tr_loss/nb_tr_steps
#             print(f"Training loss per 500 training steps: {loss_step}")
            
#         # 计算准确率
#         flattened_targets = targets.view(-1) # 真实标签 大小 (batch_size * seq_len,)
#         active_logits = tr_logits.view(-1, model.num_labels) # 模型输出shape (batch_size * seq_len, num_labels)
#         flattened_predictions = torch.argmax(active_logits, axis=1) # 取出每个token对应概率最大的标签索引 shape (batch_size * seq_len,)
#         # MASK：PAD
#         active_accuracy = mask.view(-1) == 1 # shape (batch_size * seq_len,)
#         targets = torch.masked_select(flattened_targets, active_accuracy)
#         predictions = torch.masked_select(flattened_predictions, active_accuracy)
        
#         tr_preds.extend(predictions)
#         tr_labels.extend(targets)
        
#         tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
#         tr_accuracy += tmp_tr_accuracy
    
#         # 梯度剪切
#         torch.nn.utils.clip_grad_norm_(
#             parameters=model.parameters(), max_norm=MAX_GRAD_NORM
#         )
        
#         # loss反向求导
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#     epoch_loss = tr_loss / nb_tr_steps
#     tr_accuracy = tr_accuracy / nb_tr_steps
#     print(f"Training loss epoch: {epoch_loss}")
#     print(f"Training accuracy epoch: {tr_accuracy}")

In [20]:
for epoch in tqdm(range(EPOCHS)):
    print(f"Training epoch: {epoch + 1}")
    train()

  0%|          | 0/10 [00:00<?, ?it/s]

Training epoch: 1
Training loss per 500 training steps: 4.975777626037598
Training loss per 500 training steps: 0.7366198420286655


 10%|█         | 1/10 [04:22<39:21, 262.38s/it]

Training loss epoch: 0.578648552685976
Training accuracy epoch: 0.7367803948284127
Training epoch: 2
Training loss per 500 training steps: 0.38794592022895813
Training loss per 500 training steps: 0.36900885650021825


 20%|██        | 2/10 [08:44<34:59, 262.43s/it]

Training loss epoch: 0.3630326453745365
Training accuracy epoch: 0.8079709533593953
Training epoch: 3
Training loss per 500 training steps: 0.33696943521499634
Training loss per 500 training steps: 0.3307598065175934


 30%|███       | 3/10 [13:06<30:34, 262.05s/it]

Training loss epoch: 0.3265049105137587
Training accuracy epoch: 0.8221002438426439
Training epoch: 4
Training loss per 500 training steps: 0.2537868320941925
Training loss per 500 training steps: 0.3028521772451743


 40%|████      | 4/10 [17:28<26:11, 261.87s/it]

Training loss epoch: 0.30267235822975636
Training accuracy epoch: 0.8320443464149426
Training epoch: 5
Training loss per 500 training steps: 0.25788819789886475
Training loss per 500 training steps: 0.28069474680457046


 50%|█████     | 5/10 [21:59<22:07, 265.45s/it]

Training loss epoch: 0.28155801248550416
Training accuracy epoch: 0.8418065281234389
Training epoch: 6
Training loss per 500 training steps: 0.29722538590431213
Training loss per 500 training steps: 0.26301350270559687


 60%|██████    | 6/10 [26:31<17:50, 267.71s/it]

Training loss epoch: 0.2634354070276022
Training accuracy epoch: 0.8501485241984149
Training epoch: 7
Training loss per 500 training steps: 0.18836061656475067
Training loss per 500 training steps: 0.24313516703670374


 70%|███████   | 7/10 [31:04<13:27, 269.19s/it]

Training loss epoch: 0.24625737877190113
Training accuracy epoch: 0.8592509421093886
Training epoch: 8
Training loss per 500 training steps: 0.21763837337493896
Training loss per 500 training steps: 0.2285180841079967


 80%|████████  | 8/10 [35:36<09:00, 270.09s/it]

Training loss epoch: 0.23069896318018437
Training accuracy epoch: 0.8663544738335778
Training epoch: 9
Training loss per 500 training steps: 0.1858900934457779
Training loss per 500 training steps: 0.2112922418617203


 90%|█████████ | 9/10 [40:08<04:30, 270.75s/it]

Training loss epoch: 0.21540155093371868
Training accuracy epoch: 0.874740610315942
Training epoch: 10
Training loss per 500 training steps: 0.1846655309200287
Training loss per 500 training steps: 0.19838245621995773


100%|██████████| 10/10 [44:31<00:00, 267.19s/it]

Training loss epoch: 0.20167457877844572
Training accuracy epoch: 0.8818991130050614





## 8 模型评估

In [23]:
def valid(model, testing_loader):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
    
    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['ids'].to(device, dtype = torch.long)
            mask = batch['masks'].to(device, dtype = torch.long)
            targets = batch['targets'].to(device, dtype = torch.long)
            
            # loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=targets)
            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs[0],outputs[1]
            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += targets.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # 计算准确率
            flattened_targets = targets.view(-1) # 大小 (batch_size * seq_len,)
            active_logits = eval_logits.view(-1, model.num_labels) # 大小 (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # 大小 (batch_size * seq_len,)
            active_accuracy = mask.view(-1) == 1 # 大小 (batch_size * seq_len,)
            targets = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(targets)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
    
    #print(eval_labels)
    #print(eval_preds)

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]

    #print(labels)
    #print(predictions)
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    print(f"Validation Loss: {eval_loss}")
    print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions

In [26]:
labels, predictions = valid(model, test_dataloader)

Validation loss per 100 evaluation steps: 0.43385112285614014
Validation loss per 100 evaluation steps: 0.3679109271800164
Validation loss per 100 evaluation steps: 0.3735577353642355
Validation Loss: 0.3728489454388618
Validation Accuracy: 0.8158323835892893
