# **1. Kết nối với google drive**

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

os.chdir('/content/drive/MyDrive/NLP 4+1/Cuối kỳ/')
os.getcwd()

Mounted at /content/drive


'/content/drive/MyDrive/NLP 4+1/Cuối kỳ'

# **2. Install và import các thư viện cần thiết**

In [None]:
!pip install transformers --quiet
!pip install pyvi --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m90.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m111.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m78.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m58.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
import time
import pandas as pd
from pyvi import ViTokenizer
from tqdm.notebook import tqdm

from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader

from transformers import GPT2LMHeadModel, GPT2Model
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer
from transformers import TextDataset, GPT2Tokenizer, AutoTokenizer

# **3. Set các biến cục bộ**

In [None]:
import pickle

# saving
def save_file(data, path):
    with open(path + '.pickle', 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

# loading
def load_file(path):
    with open(path, 'rb') as handle:
        data = pickle.load(handle)
    return data

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
EPOCHS = 100
BATCH_SIZE = 32
path_data = './dataset/'                # đường dẫn data
path_pre_trained = './pre-trained/GPT/'  # đường dẫn chứa các model đã train

In [None]:
# Load the tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('danghuy1999/gpt2-viwiki')
model = GPT2LMHeadModel.from_pretrained('danghuy1999/gpt2-viwiki')
# save
save_file(model, path_pre_trained + 'GPT2LMHeadModel')

Some weights of the model checkpoint at danghuy1999/gpt2-viwiki were not used when initializing GPT2LMHeadModel: ['multiple_choice_head.summary.bias', 'multiple_choice_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [None]:
print("Number of parameters in model: ", model.num_parameters())

Number of parameters in model:  124439808


In [None]:
print("Vocabulary size: ", tokenizer.vocab_size)
print("Maximum model input length: ", tokenizer.max_model_input_sizes['gpt2'])

Vocabulary size:  50257
Maximum model input length:  1024


In [None]:
tokenizer.add_special_tokens({"pad_token": "<pad>", 
                              "bos_token": "<s>",
                              "eos_token": "</s>"})
tokenizer.add_tokens(["<bot>:"])

# save
save_file(tokenizer, path_pre_trained + 'tokenizer')

In [None]:
model.resize_token_embeddings(len(tokenizer))

model = model.to(device)

# **4. Xây dựng class để tạo dataset cho quá trình huấn luyện**

In [None]:
class ChatData(Dataset):
    def __init__(self, path:str, tokenizer):
        self.data = pd.read_csv(path, usecols = [0,1])

        self.X = []
        for q,a in self.data.values:
            self.X.append('<s> '+ q +" <bot>: "+ a +' </s>')
        
        print(self.X[0])

        self.X_encoded = tokenizer(self.X, max_length=32, truncation=True, padding="max_length", return_tensors="pt")
        self.input_ids = self.X_encoded['input_ids']
        self.attention_mask = self.X_encoded['attention_mask']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (self.input_ids[idx], self.attention_mask[idx])

In [None]:
chatData = ChatData(path_data + 'train.csv', tokenizer)
chatData =  DataLoader(chatData, batch_size=BATCH_SIZE)

<s> có thưởng tết không <bot>: có </s>


# **5. Xây dựng hàm để traning và trả lời câu hỏi từ model**

In [None]:
def train(chatData, model, optim, epochs):
    loss_history = []
    for i in range(epochs):
        total_loss = 0
        print('======== Epoch {:} / {:} ========'.format(i + 1, epochs))
        for X, a in tqdm(chatData):
            X = X.to(device)
            a = a.to(device)
            optim.zero_grad()
            loss = model(X, attention_mask=a, labels=X).loss
            total_loss += loss.item()
            loss.backward()
            optim.step()
        avg_train_loss = total_loss / len(chatData)  
        loss_history.append(avg_train_loss)
        torch.save(model.state_dict(), path_pre_trained + "model_GPT.pt")
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print(bot_give_answer("bạn học ngành gì ?"))
    return loss_history
    
def bot_give_answer(inp):
    inp = '<s> '+ inp +" <bot>: "
    inp = tokenizer(inp, return_tensors="pt")
    X = inp["input_ids"].to(device)
    mask = inp["attention_mask"].to(device)
    output = model.generate(X, attention_mask=mask, max_length=32, pad_token_id=0, eos_token_id=50256)
    output = tokenizer.decode(output[0])
    return output

In [None]:
# training
model.train()
optim = Adam(model.parameters(), lr=1e-3)

t0 = time.time()
loss_history = train(chatData, model, optim, epochs=EPOCHS)
time_train = time.time() - t0



  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 2.99
<s> bạn học ngành gì? <bot>: mình học ngành gì </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 2.00
<s> bạn học ngành gì? <bot>: mình học ngành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 1.63
<s> bạn học ngành gì? <bot>: mình học ngành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 1.40
<s> bạn học ngành gì? <bot>: mình học ngành kỹ_thuật công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 1.22
<s> bạn học ngành gì? <bot>: mình học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 1.07
<s> bạn học ngành gì? <bot>: mình học công_nghệ_thuật á </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.94
<s> bạn học ngành gì? <bot>: bạn học khoa_học máy </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.84
<s> bạn học ngành gì? <bot>: mình học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.74
<s> bạn học ngành gì? <bot>: mình học khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.67
<s> bạn học ngành gì? <bot>: mình học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.60
<s> bạn học ngành gì? <bot>: mình học công_thức </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.56
<s> bạn học ngành gì? <bot>: ngủ </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.52
<s> bạn học ngành gì? <bot>: đó là ngành về người_ta </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.49
<s> bạn học ngành gì? <bot>: bạn học ngành gì </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.46
<s> bạn học ngành gì? <bot>: nguyễn hữu thọ </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.44
<s> bạn học ngành gì? <bot>: mình học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.43
<s> bạn học ngành gì? <bot>: chuyên_phạm kỹ_thuật á </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.42
<s> bạn học ngành gì? <bot>: chuyên_ngành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.41
<s> bạn học ngành gì? <bot>: chuyên_ngành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.40
<s> bạn học ngành gì? <bot>: bạn học ngành công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.39
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.39
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.38
<s> bạn học ngành gì? <bot>: tui học khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.37
<s> bạn học ngành gì? <bot>: it </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.37
<s> bạn học ngành gì? <bot>: uk học t_pháp cần phải học nhiều lắm </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.37
<s> bạn học ngành gì? <bot>: bạn học chuyên_ngành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.36
<s> bạn học ngành gì? <bot>: chuyên_phạm kỹ_thuật á </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.36
<s> bạn học ngành gì? <bot>: mình học tdtui bạn cũng học </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.35
<s> bạn học ngành gì? <bot>: sư_phạm kỹ_thuật á </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.35
<s> bạn học ngành gì? <bot>: mình học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.34
<s> bạn học ngành gì? <bot>: bạn là học chuyên_ngành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.34
<s> bạn học ngành gì? <bot>: mình đang học không có sao không có </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.34
<s> bạn học ngành gì? <bot>: vấn đồ ăn nhiều quá vậy bạn </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.34
<s> bạn học ngành gì? <bot>: bạn học khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.33
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.33
<s> bạn học ngành gì? <bot>: mình đang học làm đồ để ngày_mai bạn_bè </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.33
<s> bạn học ngành gì? <bot>: giỏi_thế </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.32
<s> bạn học ngành gì? <bot>: ý_vật lo </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.32
<s> bạn học ngành gì? <bot>: còn bạn mình học khoa_học_b </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.32
<s> bạn học ngành gì? <bot>:  red_đề này là không </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.32
<s> bạn học ngành gì? <bot>: bản </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.32
<s> bạn học ngành gì? <bot>: còn bạn đang học quản_trị kinh </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.31
<s> bạn học ngành gì? <bot>: vote xe_thoại iu </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.31
<s> bạn học ngành gì? <bot>: hướng_dẫn trên điện_thoại </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.31
<s> bạn học ngành gì? <bot>: it bạn ạ </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.31
<s> bạn học ngành gì? <bot>: mình đang học tdt </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: ghét iphone </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: giàu </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: vấn học điện_thoại </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: nghề kiếm tiền điện </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: chọn tốt </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: chọn bạn ạ </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: tất_nhiên rồi mình đang tìm chân lại làm dược_sĩ </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: tao đang học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: 2 làm 5 </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: tao học ngành kỹ_thuật phần_mềm </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: chim bạn ạ </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: tui went up there </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.30
<s> bạn học ngành gì? <bot>: bản khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: tao đang tìm chân lên xã_web </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk mình là học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: uk </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.29
<s> bạn học ngành gì? <bot>: nghe điện_thoại_thoại_thoại_thoại_oppo </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: trái_đất </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: học khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: chim sátịn </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: mình học_nghề_nghề không </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: noi downey jr </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tui học nghành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tui học nghành khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: bản_động_sản </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: bản_tráng đến đó </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tao đang học làm 5 ngành công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tui học went </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: thực_sự thì học ngành gì </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tao học it </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tao đang học công_nghệ thông_tin </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: chuyển qua gọi mình nhiều lắm </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: tam_toán </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: mình đang học làm người </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: xinh_thuật </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.28
<s> bạn học ngành gì? <bot>: vấn_chọn hình_thức </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: chuyển_ vô_đời </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: tam_sống </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: xinh bình </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: hiện_tại mình là người đang ở đó </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: minh phương_minh đang mình học ngành kỹ_thuật </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: tao học cntt </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: mình học khoa_học máy_tính </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: bạn học ngành kỹ_thuật phần_mềm </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: tôi học ngành kỹ_thuật phần_mềm </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: tôi học cntt </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: tôi học cntt </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: bản_xuất </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


  0%|          | 0/336 [00:00<?, ?it/s]

  Average training loss: 0.27
<s> bạn học ngành gì? <bot>: tôi học ngành kỹ_thuật phần_mềm </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>


In [None]:
val_compare = {
    'loss_history': loss_history,
    'time_train': time_train
}

# save
save_file(val_compare, path_pre_trained + 'val_compare')