In [15]:
import pandas as pd

In [16]:
dataset = pd.read_csv('data/processed/openhust_with_id.csv')

In [17]:
dataset.head()

Unnamed: 0,ID,Document,Summary,Dataset
0,1,"Theo đó , các sở , ngành trên địa bàn phải vào...","Các quận , huyện , thị xã tuyên truyền bằng nh...",vietnews
1,2,"Chiều 12/3 , ông Vũ Hùng Triều , Trưởng phòng ...",Cháu bé được phát hiện trong tình trạng chưa c...,vietnews
2,3,"Thoạt đầu nhìn vào bức ảnh , nếu không có dấu ...",Báo tuyết ( Panthera uncia ) được mệnh danh là...,vietnews
3,4,"Khoảng 9h30 sáng 5/6 , anh Đào Nhật Tuấn ( 30 ...","Thấy con chim bị mắc bẫy rơi xuống hồ nước , a...",vietnews
4,5,Liên minh Tự do Dân sự Mỹ ( ACLU ) đã nhận đượ...,"Loại "" huyết thanh nói thật "" mà CIA sử dụng đ...",vietnews


In [18]:
dataset.rename(columns={'Document': 'Text'}, inplace=True)

In [19]:
dataset = dataset[['Summary','Text']]
dataset = dataset.head(10000)

In [20]:
dataset.head()

Unnamed: 0,Summary,Text
0,"Các quận , huyện , thị xã tuyên truyền bằng nh...","Theo đó , các sở , ngành trên địa bàn phải vào..."
1,Cháu bé được phát hiện trong tình trạng chưa c...,"Chiều 12/3 , ông Vũ Hùng Triều , Trưởng phòng ..."
2,Báo tuyết ( Panthera uncia ) được mệnh danh là...,"Thoạt đầu nhìn vào bức ảnh , nếu không có dấu ..."
3,"Thấy con chim bị mắc bẫy rơi xuống hồ nước , a...","Khoảng 9h30 sáng 5/6 , anh Đào Nhật Tuấn ( 30 ..."
4,"Loại "" huyết thanh nói thật "" mà CIA sử dụng đ...",Liên minh Tự do Dân sự Mỹ ( ACLU ) đã nhận đượ...


In [21]:
from datasets import Dataset
dataset = Dataset.from_pandas(dataset)

In [22]:
full_dataset = dataset.train_test_split(test_size=0.2, shuffle=True)
dataset_train = full_dataset['train']
dataset_valid = full_dataset['test']

print(dataset_train)
print(dataset_valid)

Dataset({
    features: ['Summary', 'Text'],
    num_rows: 8000
})
Dataset({
    features: ['Summary', 'Text'],
    num_rows: 2000
})


In [25]:
import torch
import pprint
import evaluate
import numpy as np

In [26]:
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    TrainingArguments,
    Trainer
)

In [27]:
MODEL = 't5-small'
BATCH_SIZE = 4
NUM_PROCS = 4
EPOCHS = 5
OUT_DIR = './result666'
MAX_LENGTH = 512 

In [28]:
tokenizer = T5Tokenizer('models/trained_spiece.model')

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [29]:
from functools import partial
from transformers import T5Tokenizer

# Initialize the tokenizer
# tokenizer = T5Tokenizer.from_pretrained(MODEL)
tokenizer = T5Tokenizer('models/trained_spiece.model')
# Function to convert text data into model inputs and targets
def preprocess_function(examples, tokenizer):
    inputs = [f"summarize: {text}" for text in examples['Text']]
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding='max_length'
    )

    # Set up the tokenizer for targets
    targets = [summary for summary in examples['Summary']]
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            targets,
            max_length=512,
            truncation=True,
            padding='max_length'
        )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Create a partial function with the tokenizer
preprocess_function_with_tokenizer = partial(preprocess_function, tokenizer=tokenizer)

# Apply the function to the whole dataset
tokenized_train = dataset_train.map(
    preprocess_function_with_tokenizer,
    batched=True,
    num_proc=NUM_PROCS
)
tokenized_valid = dataset_valid.map(
    preprocess_function_with_tokenizer,
    batched=True,
    num_proc=NUM_PROCS
)


Map (num_proc=4):   0%|          | 0/8000 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [31]:
model = T5ForConditionalGeneration.from_pretrained(MODEL)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

60,506,624 total parameters.
60,506,624 training parameters.


In [32]:
rouge = evaluate.load("rouge")
 
def compute_metrics(eval_pred):
    predictions, labels = eval_pred.predictions[0], eval_pred.label_ids
 
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
 
    result = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        use_stemmer=True,
        rouge_types=[
            'rouge1',
            'rouge2',
            'rougeL'
        ]
    )
 
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)
 
    return {k: round(v, 4) for k, v in result.items()}

In [33]:
def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak.
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels

In [35]:
training_args = TrainingArguments(
    output_dir=OUT_DIR,
    num_train_epochs=EPOCHS, # EPOCHS = 4
    per_device_train_batch_size=BATCH_SIZE, # BATCH_SIZE = 4
    per_device_eval_batch_size=BATCH_SIZE, 
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir=OUT_DIR,
    logging_steps=10,
    eval_strategy='steps',
    eval_steps=400,
    save_strategy='epoch',
    save_total_limit=2,
    report_to='tensorboard',
    learning_rate=0.0001,
    dataloader_num_workers=4
)
 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    compute_metrics=compute_metrics
)
 
history = trainer.train()

  0%|          | 0/10000 [00:00<?, ?it/s]

{'loss': 0.5505, 'grad_norm': 0.6501509547233582, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.01}
{'loss': 0.55, 'grad_norm': 0.5168552398681641, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.01}
{'loss': 0.548, 'grad_norm': 0.7078872323036194, 'learning_rate': 6e-06, 'epoch': 0.01}
{'loss': 0.5454, 'grad_norm': 0.5406089425086975, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.02}
{'loss': 0.5765, 'grad_norm': 0.7442662119865417, 'learning_rate': 1e-05, 'epoch': 0.03}
{'loss': 0.6236, 'grad_norm': 0.6773218512535095, 'learning_rate': 1.2e-05, 'epoch': 0.03}
{'loss': 0.511, 'grad_norm': 0.5719890594482422, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.04}
{'loss': 0.5632, 'grad_norm': 0.8859125375747681, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.04}
{'loss': 0.6622, 'grad_norm': 0.5398176312446594, 'learning_rate': 1.8e-05, 'epoch': 0.04}
{'loss': 0.5168, 'grad_norm': 0.4715711772441864, 'learning_rate': 2e-05, 'epoch': 0.05}
{'loss': 0.5524, 'grad_norm

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.47695666551589966, 'eval_rouge1': 0.6112, 'eval_rouge2': 0.31, 'eval_rougeL': 0.4628, 'eval_gen_len': 42.2295, 'eval_runtime': 93.3387, 'eval_samples_per_second': 21.427, 'eval_steps_per_second': 5.357, 'epoch': 0.2}
{'loss': 0.5204, 'grad_norm': 0.6771959066390991, 'learning_rate': 8.2e-05, 'epoch': 0.2}
{'loss': 0.5303, 'grad_norm': 0.4466670751571655, 'learning_rate': 8.4e-05, 'epoch': 0.21}
{'loss': 0.4724, 'grad_norm': 0.5213791131973267, 'learning_rate': 8.6e-05, 'epoch': 0.21}
{'loss': 0.5627, 'grad_norm': 0.605518102645874, 'learning_rate': 8.800000000000001e-05, 'epoch': 0.22}
{'loss': 0.4768, 'grad_norm': 0.3355215787887573, 'learning_rate': 9e-05, 'epoch': 0.23}
{'loss': 0.4898, 'grad_norm': 0.30185094475746155, 'learning_rate': 9.200000000000001e-05, 'epoch': 0.23}
{'loss': 0.5331, 'grad_norm': 0.6162463426589966, 'learning_rate': 9.4e-05, 'epoch': 0.23}
{'loss': 0.473, 'grad_norm': 1.788686752319336, 'learning_rate': 9.6e-05, 'epoch': 0.24}
{'loss': 0.4675,

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.4514653980731964, 'eval_rouge1': 0.6171, 'eval_rouge2': 0.3189, 'eval_rougeL': 0.4718, 'eval_gen_len': 43.0795, 'eval_runtime': 91.8426, 'eval_samples_per_second': 21.776, 'eval_steps_per_second': 5.444, 'epoch': 0.4}
{'loss': 0.5209, 'grad_norm': 0.42755332589149475, 'learning_rate': 9.673684210526316e-05, 'epoch': 0.41}
{'loss': 0.4757, 'grad_norm': 0.31793132424354553, 'learning_rate': 9.663157894736843e-05, 'epoch': 0.41}
{'loss': 0.4313, 'grad_norm': 0.45539724826812744, 'learning_rate': 9.652631578947369e-05, 'epoch': 0.41}
{'loss': 0.4242, 'grad_norm': 0.33187028765678406, 'learning_rate': 9.642105263157896e-05, 'epoch': 0.42}
{'loss': 0.4605, 'grad_norm': 0.34293368458747864, 'learning_rate': 9.631578947368421e-05, 'epoch': 0.42}
{'loss': 0.4022, 'grad_norm': 0.4436727464199066, 'learning_rate': 9.621052631578947e-05, 'epoch': 0.43}
{'loss': 0.4799, 'grad_norm': 0.3932771682739258, 'learning_rate': 9.610526315789474e-05, 'epoch': 0.43}
{'loss': 0.3722, 'grad_nor

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.43480512499809265, 'eval_rouge1': 0.6147, 'eval_rouge2': 0.3189, 'eval_rougeL': 0.472, 'eval_gen_len': 43.1525, 'eval_runtime': 92.6325, 'eval_samples_per_second': 21.591, 'eval_steps_per_second': 5.398, 'epoch': 0.6}
{'loss': 0.4606, 'grad_norm': 0.30242204666137695, 'learning_rate': 9.252631578947369e-05, 'epoch': 0.6}
{'loss': 0.509, 'grad_norm': 0.40138310194015503, 'learning_rate': 9.242105263157895e-05, 'epoch': 0.61}
{'loss': 0.4494, 'grad_norm': 0.3667277991771698, 'learning_rate': 9.231578947368421e-05, 'epoch': 0.61}
{'loss': 0.4588, 'grad_norm': 0.2850913107395172, 'learning_rate': 9.221052631578948e-05, 'epoch': 0.62}
{'loss': 0.4338, 'grad_norm': 0.3648276925086975, 'learning_rate': 9.210526315789474e-05, 'epoch': 0.62}
{'loss': 0.4304, 'grad_norm': 0.333997517824173, 'learning_rate': 9.200000000000001e-05, 'epoch': 0.63}
{'loss': 0.4195, 'grad_norm': 0.3983985185623169, 'learning_rate': 9.189473684210527e-05, 'epoch': 0.64}
{'loss': 0.483, 'grad_norm': 0.3

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.42746350169181824, 'eval_rouge1': 0.6266, 'eval_rouge2': 0.3268, 'eval_rougeL': 0.4793, 'eval_gen_len': 43.152, 'eval_runtime': 93.2788, 'eval_samples_per_second': 21.441, 'eval_steps_per_second': 5.36, 'epoch': 0.8}
{'loss': 0.4122, 'grad_norm': 0.4203113615512848, 'learning_rate': 8.831578947368422e-05, 'epoch': 0.81}
{'loss': 0.4028, 'grad_norm': 0.5077258944511414, 'learning_rate': 8.821052631578948e-05, 'epoch': 0.81}
{'loss': 0.4645, 'grad_norm': 0.39222678542137146, 'learning_rate': 8.810526315789475e-05, 'epoch': 0.81}
{'loss': 0.4252, 'grad_norm': 0.28349974751472473, 'learning_rate': 8.800000000000001e-05, 'epoch': 0.82}
{'loss': 0.4207, 'grad_norm': 0.2958548069000244, 'learning_rate': 8.789473684210526e-05, 'epoch': 0.82}
{'loss': 0.467, 'grad_norm': 0.32843896746635437, 'learning_rate': 8.778947368421053e-05, 'epoch': 0.83}
{'loss': 0.5081, 'grad_norm': 0.3928925395011902, 'learning_rate': 8.768421052631579e-05, 'epoch': 0.83}
{'loss': 0.3993, 'grad_norm': 

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.42178982496261597, 'eval_rouge1': 0.6339, 'eval_rouge2': 0.3297, 'eval_rougeL': 0.4808, 'eval_gen_len': 43.1525, 'eval_runtime': 97.2819, 'eval_samples_per_second': 20.559, 'eval_steps_per_second': 5.14, 'epoch': 1.0}
{'loss': 0.4876, 'grad_norm': 0.41858693957328796, 'learning_rate': 8.410526315789475e-05, 'epoch': 1.0}
{'loss': 0.3919, 'grad_norm': 0.40605586767196655, 'learning_rate': 8.4e-05, 'epoch': 1.01}
{'loss': 0.429, 'grad_norm': 0.3154928386211395, 'learning_rate': 8.389473684210527e-05, 'epoch': 1.01}
{'loss': 0.417, 'grad_norm': 0.2776026725769043, 'learning_rate': 8.378947368421053e-05, 'epoch': 1.02}
{'loss': 0.4658, 'grad_norm': 0.5477669835090637, 'learning_rate': 8.36842105263158e-05, 'epoch': 1.02}
{'loss': 0.4244, 'grad_norm': 0.3564594089984894, 'learning_rate': 8.357894736842106e-05, 'epoch': 1.03}
{'loss': 0.3916, 'grad_norm': 0.36120083928108215, 'learning_rate': 8.347368421052631e-05, 'epoch': 1.03}
{'loss': 0.439, 'grad_norm': 0.241930827498435

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.4144328534603119, 'eval_rouge1': 0.6297, 'eval_rouge2': 0.3294, 'eval_rougeL': 0.4816, 'eval_gen_len': 43.1525, 'eval_runtime': 96.6116, 'eval_samples_per_second': 20.701, 'eval_steps_per_second': 5.175, 'epoch': 1.2}
{'loss': 0.4469, 'grad_norm': 0.26377540826797485, 'learning_rate': 7.989473684210527e-05, 'epoch': 1.21}
{'loss': 0.4531, 'grad_norm': 0.34014278650283813, 'learning_rate': 7.978947368421053e-05, 'epoch': 1.21}
{'loss': 0.4333, 'grad_norm': 0.5104837417602539, 'learning_rate': 7.96842105263158e-05, 'epoch': 1.22}
{'loss': 0.4065, 'grad_norm': 0.2524182200431824, 'learning_rate': 7.957894736842106e-05, 'epoch': 1.22}
{'loss': 0.3842, 'grad_norm': 0.436619371175766, 'learning_rate': 7.947368421052632e-05, 'epoch': 1.23}
{'loss': 0.3771, 'grad_norm': 0.27353766560554504, 'learning_rate': 7.936842105263158e-05, 'epoch': 1.23}
{'loss': 0.4427, 'grad_norm': 0.35374540090560913, 'learning_rate': 7.926315789473685e-05, 'epoch': 1.23}
{'loss': 0.4865, 'grad_norm':

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.41062265634536743, 'eval_rouge1': 0.642, 'eval_rouge2': 0.3363, 'eval_rougeL': 0.4863, 'eval_gen_len': 43.1525, 'eval_runtime': 96.8715, 'eval_samples_per_second': 20.646, 'eval_steps_per_second': 5.161, 'epoch': 1.4}
{'loss': 0.3814, 'grad_norm': 0.37047505378723145, 'learning_rate': 7.56842105263158e-05, 'epoch': 1.41}
{'loss': 0.4381, 'grad_norm': 0.40300628542900085, 'learning_rate': 7.557894736842106e-05, 'epoch': 1.41}
{'loss': 0.381, 'grad_norm': 0.40449681878089905, 'learning_rate': 7.547368421052632e-05, 'epoch': 1.42}
{'loss': 0.4134, 'grad_norm': 0.3108035624027252, 'learning_rate': 7.536842105263158e-05, 'epoch': 1.42}
{'loss': 0.4728, 'grad_norm': 0.5184997916221619, 'learning_rate': 7.526315789473685e-05, 'epoch': 1.43}
{'loss': 0.3988, 'grad_norm': 0.28799107670783997, 'learning_rate': 7.515789473684211e-05, 'epoch': 1.43}
{'loss': 0.3972, 'grad_norm': 0.3201943635940552, 'learning_rate': 7.505263157894737e-05, 'epoch': 1.44}
{'loss': 0.4187, 'grad_norm':

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.40462082624435425, 'eval_rouge1': 0.641, 'eval_rouge2': 0.3369, 'eval_rougeL': 0.4879, 'eval_gen_len': 43.1525, 'eval_runtime': 98.6568, 'eval_samples_per_second': 20.272, 'eval_steps_per_second': 5.068, 'epoch': 1.6}
{'loss': 0.4387, 'grad_norm': 0.354946494102478, 'learning_rate': 7.147368421052631e-05, 'epoch': 1.6}
{'loss': 0.4085, 'grad_norm': 0.3172445297241211, 'learning_rate': 7.136842105263159e-05, 'epoch': 1.61}
{'loss': 0.3978, 'grad_norm': 0.3124130964279175, 'learning_rate': 7.126315789473685e-05, 'epoch': 1.61}
{'loss': 0.3833, 'grad_norm': 0.45782408118247986, 'learning_rate': 7.115789473684211e-05, 'epoch': 1.62}
{'loss': 0.4352, 'grad_norm': 0.3568982183933258, 'learning_rate': 7.105263157894737e-05, 'epoch': 1.62}
{'loss': 0.4647, 'grad_norm': 0.37002861499786377, 'learning_rate': 7.094736842105264e-05, 'epoch': 1.63}
{'loss': 0.4697, 'grad_norm': 0.27991798520088196, 'learning_rate': 7.08421052631579e-05, 'epoch': 1.64}
{'loss': 0.475, 'grad_norm': 0.

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.4011801779270172, 'eval_rouge1': 0.6488, 'eval_rouge2': 0.3405, 'eval_rougeL': 0.4911, 'eval_gen_len': 43.1525, 'eval_runtime': 95.5244, 'eval_samples_per_second': 20.937, 'eval_steps_per_second': 5.234, 'epoch': 1.8}
{'loss': 0.4977, 'grad_norm': 0.4730972349643707, 'learning_rate': 6.726315789473685e-05, 'epoch': 1.81}
{'loss': 0.3327, 'grad_norm': 0.25124016404151917, 'learning_rate': 6.71578947368421e-05, 'epoch': 1.81}
{'loss': 0.4482, 'grad_norm': 0.377005010843277, 'learning_rate': 6.705263157894737e-05, 'epoch': 1.81}
{'loss': 0.3423, 'grad_norm': 0.30536067485809326, 'learning_rate': 6.694736842105264e-05, 'epoch': 1.82}
{'loss': 0.4381, 'grad_norm': 0.5284860134124756, 'learning_rate': 6.68421052631579e-05, 'epoch': 1.82}
{'loss': 0.4006, 'grad_norm': 0.3184221684932709, 'learning_rate': 6.673684210526316e-05, 'epoch': 1.83}
{'loss': 0.4025, 'grad_norm': 0.2792104482650757, 'learning_rate': 6.663157894736842e-05, 'epoch': 1.83}
{'loss': 0.3769, 'grad_norm': 0.

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3967725336551666, 'eval_rouge1': 0.6502, 'eval_rouge2': 0.3416, 'eval_rougeL': 0.4929, 'eval_gen_len': 43.1525, 'eval_runtime': 94.0076, 'eval_samples_per_second': 21.275, 'eval_steps_per_second': 5.319, 'epoch': 2.0}
{'loss': 0.4326, 'grad_norm': 0.23297114670276642, 'learning_rate': 6.305263157894738e-05, 'epoch': 2.0}
{'loss': 0.3693, 'grad_norm': 0.4418458342552185, 'learning_rate': 6.294736842105264e-05, 'epoch': 2.01}
{'loss': 0.3533, 'grad_norm': 0.3066933751106262, 'learning_rate': 6.28421052631579e-05, 'epoch': 2.02}
{'loss': 0.3869, 'grad_norm': 0.370021790266037, 'learning_rate': 6.273684210526316e-05, 'epoch': 2.02}
{'loss': 0.4001, 'grad_norm': 0.35760387778282166, 'learning_rate': 6.263157894736842e-05, 'epoch': 2.02}
{'loss': 0.4476, 'grad_norm': 0.23314808309078217, 'learning_rate': 6.252631578947369e-05, 'epoch': 2.03}
{'loss': 0.3396, 'grad_norm': 0.35580331087112427, 'learning_rate': 6.242105263157895e-05, 'epoch': 2.04}
{'loss': 0.4416, 'grad_norm': 

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.39395812153816223, 'eval_rouge1': 0.6522, 'eval_rouge2': 0.3426, 'eval_rougeL': 0.495, 'eval_gen_len': 43.1525, 'eval_runtime': 94.3616, 'eval_samples_per_second': 21.195, 'eval_steps_per_second': 5.299, 'epoch': 2.2}
{'loss': 0.4121, 'grad_norm': 0.6152205467224121, 'learning_rate': 5.88421052631579e-05, 'epoch': 2.21}
{'loss': 0.4147, 'grad_norm': 0.37999778985977173, 'learning_rate': 5.8736842105263154e-05, 'epoch': 2.21}
{'loss': 0.4026, 'grad_norm': 0.37444114685058594, 'learning_rate': 5.863157894736843e-05, 'epoch': 2.21}
{'loss': 0.4487, 'grad_norm': 0.4095427989959717, 'learning_rate': 5.852631578947369e-05, 'epoch': 2.22}
{'loss': 0.4262, 'grad_norm': 0.34542468190193176, 'learning_rate': 5.8421052631578954e-05, 'epoch': 2.23}
{'loss': 0.3279, 'grad_norm': 0.2913527488708496, 'learning_rate': 5.8315789473684214e-05, 'epoch': 2.23}
{'loss': 0.2986, 'grad_norm': 0.3448982238769531, 'learning_rate': 5.821052631578948e-05, 'epoch': 2.23}
{'loss': 0.4139, 'grad_nor

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.39222782850265503, 'eval_rouge1': 0.6544, 'eval_rouge2': 0.3461, 'eval_rougeL': 0.4968, 'eval_gen_len': 43.1525, 'eval_runtime': 96.7451, 'eval_samples_per_second': 20.673, 'eval_steps_per_second': 5.168, 'epoch': 2.4}
{'loss': 0.4328, 'grad_norm': 0.4399929344654083, 'learning_rate': 5.4631578947368425e-05, 'epoch': 2.41}
{'loss': 0.3818, 'grad_norm': 0.47657519578933716, 'learning_rate': 5.452631578947369e-05, 'epoch': 2.41}
{'loss': 0.4377, 'grad_norm': 0.27068892121315, 'learning_rate': 5.442105263157895e-05, 'epoch': 2.42}
{'loss': 0.4035, 'grad_norm': 0.3354891538619995, 'learning_rate': 5.431578947368421e-05, 'epoch': 2.42}
{'loss': 0.4103, 'grad_norm': 0.3966781497001648, 'learning_rate': 5.421052631578948e-05, 'epoch': 2.42}
{'loss': 0.3676, 'grad_norm': 0.3752385973930359, 'learning_rate': 5.410526315789474e-05, 'epoch': 2.43}
{'loss': 0.3744, 'grad_norm': 0.4348120093345642, 'learning_rate': 5.4000000000000005e-05, 'epoch': 2.44}
{'loss': 0.4905, 'grad_norm':

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3893381953239441, 'eval_rouge1': 0.6564, 'eval_rouge2': 0.3454, 'eval_rougeL': 0.4973, 'eval_gen_len': 43.1525, 'eval_runtime': 95.0835, 'eval_samples_per_second': 21.034, 'eval_steps_per_second': 5.259, 'epoch': 2.6}
{'loss': 0.3256, 'grad_norm': 0.37922459840774536, 'learning_rate': 5.042105263157895e-05, 'epoch': 2.6}
{'loss': 0.4464, 'grad_norm': 0.35446053743362427, 'learning_rate': 5.0315789473684216e-05, 'epoch': 2.61}
{'loss': 0.3402, 'grad_norm': 0.22196196019649506, 'learning_rate': 5.0210526315789476e-05, 'epoch': 2.62}
{'loss': 0.3773, 'grad_norm': 0.3503531813621521, 'learning_rate': 5.010526315789474e-05, 'epoch': 2.62}
{'loss': 0.3943, 'grad_norm': 0.35726380348205566, 'learning_rate': 5e-05, 'epoch': 2.62}
{'loss': 0.3546, 'grad_norm': 0.341122031211853, 'learning_rate': 4.989473684210527e-05, 'epoch': 2.63}
{'loss': 0.3549, 'grad_norm': 0.3757414221763611, 'learning_rate': 4.978947368421053e-05, 'epoch': 2.63}
{'loss': 0.428, 'grad_norm': 0.302809268236

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3871671259403229, 'eval_rouge1': 0.657, 'eval_rouge2': 0.348, 'eval_rougeL': 0.4988, 'eval_gen_len': 43.1525, 'eval_runtime': 95.8729, 'eval_samples_per_second': 20.861, 'eval_steps_per_second': 5.215, 'epoch': 2.8}
{'loss': 0.4222, 'grad_norm': 0.33852148056030273, 'learning_rate': 4.6210526315789473e-05, 'epoch': 2.81}
{'loss': 0.4331, 'grad_norm': 0.2858482599258423, 'learning_rate': 4.610526315789474e-05, 'epoch': 2.81}
{'loss': 0.3718, 'grad_norm': 0.40588754415512085, 'learning_rate': 4.600000000000001e-05, 'epoch': 2.81}
{'loss': 0.3941, 'grad_norm': 0.4647252857685089, 'learning_rate': 4.589473684210526e-05, 'epoch': 2.82}
{'loss': 0.4147, 'grad_norm': 0.34081804752349854, 'learning_rate': 4.5789473684210527e-05, 'epoch': 2.83}
{'loss': 0.4238, 'grad_norm': 0.27213427424430847, 'learning_rate': 4.568421052631579e-05, 'epoch': 2.83}
{'loss': 0.3984, 'grad_norm': 0.30905666947364807, 'learning_rate': 4.557894736842105e-05, 'epoch': 2.83}
{'loss': 0.3909, 'grad_nor

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3850921094417572, 'eval_rouge1': 0.6596, 'eval_rouge2': 0.3499, 'eval_rougeL': 0.5002, 'eval_gen_len': 43.1525, 'eval_runtime': 94.9179, 'eval_samples_per_second': 21.071, 'eval_steps_per_second': 5.268, 'epoch': 3.0}
{'loss': 0.3282, 'grad_norm': 0.21243780851364136, 'learning_rate': 4.2e-05, 'epoch': 3.0}
{'loss': 0.4394, 'grad_norm': 0.3380967676639557, 'learning_rate': 4.1894736842105264e-05, 'epoch': 3.01}
{'loss': 0.3769, 'grad_norm': 0.3439804017543793, 'learning_rate': 4.178947368421053e-05, 'epoch': 3.02}
{'loss': 0.3925, 'grad_norm': 0.4685874581336975, 'learning_rate': 4.168421052631579e-05, 'epoch': 3.02}
{'loss': 0.3308, 'grad_norm': 0.35777977108955383, 'learning_rate': 4.157894736842106e-05, 'epoch': 3.02}
{'loss': 0.3951, 'grad_norm': 0.38589778542518616, 'learning_rate': 4.147368421052632e-05, 'epoch': 3.03}
{'loss': 0.3726, 'grad_norm': 0.35979053378105164, 'learning_rate': 4.136842105263158e-05, 'epoch': 3.04}
{'loss': 0.3961, 'grad_norm': 0.303291261

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3857538402080536, 'eval_rouge1': 0.6623, 'eval_rouge2': 0.3507, 'eval_rougeL': 0.5009, 'eval_gen_len': 43.1525, 'eval_runtime': 97.6964, 'eval_samples_per_second': 20.472, 'eval_steps_per_second': 5.118, 'epoch': 3.2}
{'loss': 0.4269, 'grad_norm': 0.482953816652298, 'learning_rate': 3.778947368421053e-05, 'epoch': 3.21}
{'loss': 0.4684, 'grad_norm': 0.5197187662124634, 'learning_rate': 3.768421052631579e-05, 'epoch': 3.21}
{'loss': 0.3758, 'grad_norm': 0.37599077820777893, 'learning_rate': 3.7578947368421055e-05, 'epoch': 3.21}
{'loss': 0.3925, 'grad_norm': 0.3407813012599945, 'learning_rate': 3.7473684210526315e-05, 'epoch': 3.22}
{'loss': 0.4418, 'grad_norm': 0.3578108549118042, 'learning_rate': 3.736842105263158e-05, 'epoch': 3.23}
{'loss': 0.3754, 'grad_norm': 0.36897143721580505, 'learning_rate': 3.726315789473684e-05, 'epoch': 3.23}
{'loss': 0.3292, 'grad_norm': 0.3507036864757538, 'learning_rate': 3.715789473684211e-05, 'epoch': 3.23}
{'loss': 0.3289, 'grad_norm'

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.38172048330307007, 'eval_rouge1': 0.6599, 'eval_rouge2': 0.3514, 'eval_rougeL': 0.5018, 'eval_gen_len': 43.1525, 'eval_runtime': 98.5709, 'eval_samples_per_second': 20.29, 'eval_steps_per_second': 5.072, 'epoch': 3.4}
{'loss': 0.3602, 'grad_norm': 0.44047197699546814, 'learning_rate': 3.357894736842105e-05, 'epoch': 3.41}
{'loss': 0.3609, 'grad_norm': 0.32805073261260986, 'learning_rate': 3.347368421052632e-05, 'epoch': 3.41}
{'loss': 0.3739, 'grad_norm': 0.3099897801876068, 'learning_rate': 3.336842105263158e-05, 'epoch': 3.42}
{'loss': 0.3602, 'grad_norm': 0.46670353412628174, 'learning_rate': 3.3263157894736846e-05, 'epoch': 3.42}
{'loss': 0.3864, 'grad_norm': 0.3673863708972931, 'learning_rate': 3.3157894736842106e-05, 'epoch': 3.42}
{'loss': 0.3712, 'grad_norm': 0.3453574776649475, 'learning_rate': 3.3052631578947366e-05, 'epoch': 3.43}
{'loss': 0.3478, 'grad_norm': 0.3306275010108948, 'learning_rate': 3.294736842105263e-05, 'epoch': 3.44}
{'loss': 0.3964, 'grad_no

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3812352418899536, 'eval_rouge1': 0.6625, 'eval_rouge2': 0.353, 'eval_rougeL': 0.5027, 'eval_gen_len': 43.1525, 'eval_runtime': 93.6017, 'eval_samples_per_second': 21.367, 'eval_steps_per_second': 5.342, 'epoch': 3.6}
{'loss': 0.3945, 'grad_norm': 0.377096027135849, 'learning_rate': 2.9368421052631577e-05, 'epoch': 3.6}
{'loss': 0.3241, 'grad_norm': 0.2910347878932953, 'learning_rate': 2.9263157894736844e-05, 'epoch': 3.61}
{'loss': 0.4094, 'grad_norm': 0.35133612155914307, 'learning_rate': 2.9157894736842107e-05, 'epoch': 3.62}
{'loss': 0.3278, 'grad_norm': 0.30958428978919983, 'learning_rate': 2.905263157894737e-05, 'epoch': 3.62}
{'loss': 0.3875, 'grad_norm': 0.3750988245010376, 'learning_rate': 2.8947368421052634e-05, 'epoch': 3.62}
{'loss': 0.4457, 'grad_norm': 0.37195509672164917, 'learning_rate': 2.8842105263157897e-05, 'epoch': 3.63}
{'loss': 0.3577, 'grad_norm': 0.23994621634483337, 'learning_rate': 2.8736842105263163e-05, 'epoch': 3.63}
{'loss': 0.4278, 'grad_n

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.37977445125579834, 'eval_rouge1': 0.6639, 'eval_rouge2': 0.3539, 'eval_rougeL': 0.5036, 'eval_gen_len': 43.1525, 'eval_runtime': 95.6153, 'eval_samples_per_second': 20.917, 'eval_steps_per_second': 5.229, 'epoch': 3.8}
{'loss': 0.3188, 'grad_norm': 0.32952362298965454, 'learning_rate': 2.5157894736842108e-05, 'epoch': 3.81}
{'loss': 0.3986, 'grad_norm': 0.41450271010398865, 'learning_rate': 2.505263157894737e-05, 'epoch': 3.81}
{'loss': 0.3387, 'grad_norm': 0.38467103242874146, 'learning_rate': 2.4947368421052635e-05, 'epoch': 3.81}
{'loss': 0.3545, 'grad_norm': 0.29494762420654297, 'learning_rate': 2.4842105263157898e-05, 'epoch': 3.82}
{'loss': 0.3749, 'grad_norm': 0.33254092931747437, 'learning_rate': 2.4736842105263158e-05, 'epoch': 3.83}
{'loss': 0.3881, 'grad_norm': 0.4268513023853302, 'learning_rate': 2.463157894736842e-05, 'epoch': 3.83}
{'loss': 0.3438, 'grad_norm': 0.30396050214767456, 'learning_rate': 2.4526315789473688e-05, 'epoch': 3.83}
{'loss': 0.4231, 'g

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.37760791182518005, 'eval_rouge1': 0.6634, 'eval_rouge2': 0.3534, 'eval_rougeL': 0.5048, 'eval_gen_len': 43.1525, 'eval_runtime': 94.8624, 'eval_samples_per_second': 21.083, 'eval_steps_per_second': 5.271, 'epoch': 4.0}
{'loss': 0.4105, 'grad_norm': 0.4358352720737457, 'learning_rate': 2.0947368421052632e-05, 'epoch': 4.0}
{'loss': 0.4378, 'grad_norm': 0.3567156195640564, 'learning_rate': 2.0842105263157895e-05, 'epoch': 4.01}
{'loss': 0.3893, 'grad_norm': 0.33854159712791443, 'learning_rate': 2.073684210526316e-05, 'epoch': 4.01}
{'loss': 0.3652, 'grad_norm': 0.5534478425979614, 'learning_rate': 2.0631578947368422e-05, 'epoch': 4.02}
{'loss': 0.343, 'grad_norm': 0.5266255140304565, 'learning_rate': 2.0526315789473685e-05, 'epoch': 4.03}
{'loss': 0.3962, 'grad_norm': 0.5702618956565857, 'learning_rate': 2.042105263157895e-05, 'epoch': 4.03}
{'loss': 0.3967, 'grad_norm': 0.47531047463417053, 'learning_rate': 2.0315789473684212e-05, 'epoch': 4.04}
{'loss': 0.3327, 'grad_no

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.37738192081451416, 'eval_rouge1': 0.6636, 'eval_rouge2': 0.3538, 'eval_rougeL': 0.5047, 'eval_gen_len': 43.1525, 'eval_runtime': 100.705, 'eval_samples_per_second': 19.86, 'eval_steps_per_second': 4.965, 'epoch': 4.2}
{'loss': 0.3769, 'grad_norm': 0.43929919600486755, 'learning_rate': 1.673684210526316e-05, 'epoch': 4.21}
{'loss': 0.3278, 'grad_norm': 0.29125890135765076, 'learning_rate': 1.6631578947368423e-05, 'epoch': 4.21}
{'loss': 0.411, 'grad_norm': 0.35786861181259155, 'learning_rate': 1.6526315789473683e-05, 'epoch': 4.21}
{'loss': 0.3669, 'grad_norm': 0.3043248653411865, 'learning_rate': 1.642105263157895e-05, 'epoch': 4.22}
{'loss': 0.379, 'grad_norm': 0.4945774972438812, 'learning_rate': 1.6315789473684213e-05, 'epoch': 4.22}
{'loss': 0.4545, 'grad_norm': 0.41098955273628235, 'learning_rate': 1.6210526315789473e-05, 'epoch': 4.23}
{'loss': 0.3702, 'grad_norm': 0.4808301627635956, 'learning_rate': 1.6105263157894736e-05, 'epoch': 4.24}
{'loss': 0.3792, 'grad_n

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3765319287776947, 'eval_rouge1': 0.665, 'eval_rouge2': 0.3547, 'eval_rougeL': 0.5048, 'eval_gen_len': 43.1525, 'eval_runtime': 94.91, 'eval_samples_per_second': 21.073, 'eval_steps_per_second': 5.268, 'epoch': 4.4}
{'loss': 0.3634, 'grad_norm': 0.3395346701145172, 'learning_rate': 1.2526315789473686e-05, 'epoch': 4.41}
{'loss': 0.4003, 'grad_norm': 0.3949279189109802, 'learning_rate': 1.2421052631578949e-05, 'epoch': 4.41}
{'loss': 0.3456, 'grad_norm': 0.36869537830352783, 'learning_rate': 1.231578947368421e-05, 'epoch': 4.42}
{'loss': 0.3303, 'grad_norm': 0.3451389670372009, 'learning_rate': 1.2210526315789474e-05, 'epoch': 4.42}
{'loss': 0.4412, 'grad_norm': 0.48967695236206055, 'learning_rate': 1.2105263157894737e-05, 'epoch': 4.42}
{'loss': 0.3931, 'grad_norm': 0.2733074724674225, 'learning_rate': 1.2e-05, 'epoch': 4.43}
{'loss': 0.3576, 'grad_norm': 0.3676724433898926, 'learning_rate': 1.1894736842105264e-05, 'epoch': 4.43}
{'loss': 0.3617, 'grad_norm': 0.600617110

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.37647417187690735, 'eval_rouge1': 0.6653, 'eval_rouge2': 0.3555, 'eval_rougeL': 0.5057, 'eval_gen_len': 43.1525, 'eval_runtime': 97.7539, 'eval_samples_per_second': 20.46, 'eval_steps_per_second': 5.115, 'epoch': 4.6}
{'loss': 0.3786, 'grad_norm': 0.4645114839076996, 'learning_rate': 8.315789473684212e-06, 'epoch': 4.61}
{'loss': 0.3215, 'grad_norm': 0.28273457288742065, 'learning_rate': 8.210526315789475e-06, 'epoch': 4.61}
{'loss': 0.3744, 'grad_norm': 0.46975570917129517, 'learning_rate': 8.105263157894736e-06, 'epoch': 4.62}
{'loss': 0.3889, 'grad_norm': 0.30785834789276123, 'learning_rate': 8.000000000000001e-06, 'epoch': 4.62}
{'loss': 0.342, 'grad_norm': 0.36643776297569275, 'learning_rate': 7.894736842105263e-06, 'epoch': 4.62}
{'loss': 0.342, 'grad_norm': 0.5422959923744202, 'learning_rate': 7.789473684210528e-06, 'epoch': 4.63}
{'loss': 0.3125, 'grad_norm': 0.29334431886672974, 'learning_rate': 7.68421052631579e-06, 'epoch': 4.63}
{'loss': 0.3753, 'grad_norm':

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3764376938343048, 'eval_rouge1': 0.6651, 'eval_rouge2': 0.3551, 'eval_rougeL': 0.5051, 'eval_gen_len': 43.1525, 'eval_runtime': 91.9166, 'eval_samples_per_second': 21.759, 'eval_steps_per_second': 5.44, 'epoch': 4.8}
{'loss': 0.3772, 'grad_norm': 0.5875990390777588, 'learning_rate': 4.105263157894737e-06, 'epoch': 4.8}
{'loss': 0.3969, 'grad_norm': 0.4689684808254242, 'learning_rate': 4.000000000000001e-06, 'epoch': 4.81}
{'loss': 0.3748, 'grad_norm': 0.34493255615234375, 'learning_rate': 3.894736842105264e-06, 'epoch': 4.81}
{'loss': 0.3576, 'grad_norm': 0.3136988878250122, 'learning_rate': 3.789473684210527e-06, 'epoch': 4.82}
{'loss': 0.4399, 'grad_norm': 0.3877088725566864, 'learning_rate': 3.6842105263157892e-06, 'epoch': 4.83}
{'loss': 0.3627, 'grad_norm': 0.35851752758026123, 'learning_rate': 3.5789473684210525e-06, 'epoch': 4.83}
{'loss': 0.3783, 'grad_norm': 0.3726615905761719, 'learning_rate': 3.4736842105263158e-06, 'epoch': 4.83}
{'loss': 0.3339, 'grad_norm'

  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3762052655220032, 'eval_rouge1': 0.6654, 'eval_rouge2': 0.3551, 'eval_rougeL': 0.5051, 'eval_gen_len': 43.1525, 'eval_runtime': 93.546, 'eval_samples_per_second': 21.38, 'eval_steps_per_second': 5.345, 'epoch': 5.0}
{'train_runtime': 4451.3826, 'train_samples_per_second': 8.986, 'train_steps_per_second': 2.246, 'train_loss': 0.40834275386333463, 'epoch': 5.0}


In [36]:
model_path = f"{OUT_DIR}/checkpoint-10000"  # the path where you saved your model
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer('models/trained_spiece.model')

In [37]:
def summarize_text(text, model, tokenizer, max_length=512, num_beams=5):
    # Preprocess the text
    inputs = tokenizer.encode(
        "summarize: " + text,
        return_tensors='pt',
        max_length=max_length,
        truncation=True
    )

    # Generate the summary
    summary_ids = model.generate(
        inputs,
        max_length=200,
        num_beams=num_beams,
        # early_stopping=True,
    )

    # Decode and return the summary
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [38]:
text = 'Khu vực Nam Bộ có nắng nóng và nắng nóng gay gắt trong các ngày từ 1-8/5; sau đó từ 9/5, nắng nóng chủ yếu tập trung tại các tỉnh miền Đông Nam Bộ. Trên cả nước đã ghi nhận nhiều trạm khí tượng xảy ra giá trị nhiệt độ cao nhất ngày vượt giá trị lịch sử. Cụ thể, cùng trong ngày 1/5, ở Đông Hà (Quảng Trị) đạt đến 43,2 độ, vượt mức 42,3 độ năm 2023; Huế (Thừa Thiên Huế) 42,1 độ, vượt kỷ lục 41,3 độ năm 1983; Đà Nẵng 41,5, vượt mức 40,5 năm 1983 hay Thủ Dầu Một (Bình Dương) 38,9 độ ngày 2/5, vượt mức 38,7 năm 2016,…'
print(text)

Khu vực Nam Bộ có nắng nóng và nắng nóng gay gắt trong các ngày từ 1-8/5; sau đó từ 9/5, nắng nóng chủ yếu tập trung tại các tỉnh miền Đông Nam Bộ. Trên cả nước đã ghi nhận nhiều trạm khí tượng xảy ra giá trị nhiệt độ cao nhất ngày vượt giá trị lịch sử. Cụ thể, cùng trong ngày 1/5, ở Đông Hà (Quảng Trị) đạt đến 43,2 độ, vượt mức 42,3 độ năm 2023; Huế (Thừa Thiên Huế) 42,1 độ, vượt kỷ lục 41,3 độ năm 1983; Đà Nẵng 41,5, vượt mức 40,5 năm 1983 hay Thủ Dầu Một (Bình Dương) 38,9 độ ngày 2/5, vượt mức 38,7 năm 2016,…


In [40]:
summarize_text(text, model, tokenizer, max_length=1000)

'Trong nắng nóng chủ yếu tập trung tại các tỉnh miên Đông Nam Bộ. Trên cả nước đã ghi nhận nhiêu trạm khí tượng xảy ra giá trị nhiệt độ cao nhất ngày vượt giá trị lịch sử. Cụ thể, cùng trong ngày 1/5, ở Đông Hà (Quảng Trị) đạt đến 43,2 độ, vượt mức 38,7 năm 1983; Đà Nẵng 41,5.'

In [58]:
import json

# Đọc file JSON
with open('result666/checkpoint-10000/trainer_state.json', 'r') as file:
    data = json.load(file)

# Lấy từ điển có trường 'log_history'
log_history = data.get('log_history', [])


filtered_logs = [log for log in log_history if log.get('step', 0) % 400 == 0 and 'eval_rouge1' in log]

# In ra các từ điển đã lọc được
for log in filtered_logs:
    print(log)



{'epoch': 0.2, 'eval_gen_len': 42.2295, 'eval_loss': 0.47695666551589966, 'eval_rouge1': 0.6112, 'eval_rouge2': 0.31, 'eval_rougeL': 0.4628, 'eval_runtime': 93.3387, 'eval_samples_per_second': 21.427, 'eval_steps_per_second': 5.357, 'step': 400}
{'epoch': 0.4, 'eval_gen_len': 43.0795, 'eval_loss': 0.4514653980731964, 'eval_rouge1': 0.6171, 'eval_rouge2': 0.3189, 'eval_rougeL': 0.4718, 'eval_runtime': 91.8426, 'eval_samples_per_second': 21.776, 'eval_steps_per_second': 5.444, 'step': 800}
{'epoch': 0.6, 'eval_gen_len': 43.1525, 'eval_loss': 0.43480512499809265, 'eval_rouge1': 0.6147, 'eval_rouge2': 0.3189, 'eval_rougeL': 0.472, 'eval_runtime': 92.6325, 'eval_samples_per_second': 21.591, 'eval_steps_per_second': 5.398, 'step': 1200}
{'epoch': 0.8, 'eval_gen_len': 43.152, 'eval_loss': 0.42746350169181824, 'eval_rouge1': 0.6266, 'eval_rouge2': 0.3268, 'eval_rougeL': 0.4793, 'eval_runtime': 93.2788, 'eval_samples_per_second': 21.441, 'eval_steps_per_second': 5.36, 'step': 1600}
{'epoch': 1.

In [63]:
import pandas as pd

# Chuyển danh sách từ điển thành DataFrame
df = pd.DataFrame(filtered_logs)

# Tạo một bản sao của DataFrame
styled_df = df.style

# Tùy chỉnh màu sắc cho cột 'eval_rouge1'
styled_df = styled_df.apply(lambda row: ['color: blue' if val is None else '' for val in row], axis=1, subset=['eval_rouge1'])

# Hiển thị DataFrame đã tùy chỉnh với màu sắc
styled_df


Unnamed: 0,epoch,eval_gen_len,eval_loss,eval_rouge1,eval_rouge2,eval_rougeL,eval_runtime,eval_samples_per_second,eval_steps_per_second,step
0,0.2,42.2295,0.476957,0.6112,0.31,0.4628,93.3387,21.427,5.357,400
1,0.4,43.0795,0.451465,0.6171,0.3189,0.4718,91.8426,21.776,5.444,800
2,0.6,43.1525,0.434805,0.6147,0.3189,0.472,92.6325,21.591,5.398,1200
3,0.8,43.152,0.427464,0.6266,0.3268,0.4793,93.2788,21.441,5.36,1600
4,1.0,43.1525,0.42179,0.6339,0.3297,0.4808,97.2819,20.559,5.14,2000
5,1.2,43.1525,0.414433,0.6297,0.3294,0.4816,96.6116,20.701,5.175,2400
6,1.4,43.1525,0.410623,0.642,0.3363,0.4863,96.8715,20.646,5.161,2800
7,1.6,43.1525,0.404621,0.641,0.3369,0.4879,98.6568,20.272,5.068,3200
8,1.8,43.1525,0.40118,0.6488,0.3405,0.4911,95.5244,20.937,5.234,3600
9,2.0,43.1525,0.396773,0.6502,0.3416,0.4929,94.0076,21.275,5.319,4000


In [64]:
import gradio as gr
 
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [65]:
def summarize_text(text):
    # Preprocess the text
    inputs = tokenizer.encode(
        "summarize: " + text,
        return_tensors='pt',
        max_length=512,
        truncation=True,
        padding='max_length'
    )
 
    # Generate the summary
    summary_ids = model.generate(
        inputs,
        max_length=50,
        num_beams=5,
        # early_stopping=True
    )
 
    # Decode and return the summary
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [66]:
model_path = "result666/checkpoint-10000" 
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer('models/trained_spiece.model')

In [67]:
interface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(lines=10, placeholder='Enter Text Here...', label='Input text'),
    outputs=gr.Textbox(label='Summarized Text'),
    title='Text Summarizer using T5'
)
interface.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


