In [1]:
import pandas as pd
import torch
from tqdm import tqdm
import torch.nn.functional as F
from tqdm.auto import tqdm
from datetime import datetime
import wandb
import time
import os
from transformers import get_cosine_schedule_with_warmup

from torch.optim import AdamW

from torch.utils.data import DataLoader, Dataset
from rouge import Rouge



In [2]:
NUM_EPOCHS = 50
BATCH_SIZE = 8
FRAC_SAMPLE = 0.01
MAX_LENGTH_ARTICLE = 512
MIN_LENGTH_ARTICLE = 50
MAX_LENGTH_SUMMARY = 128
MIN_LENGTH_SUMMARY = 20
HIDDEN_DIM = 128
LEARNING_RATE = 1e-5
MAX_PLATEAU_COUNT = 7
WEIGHT_DECAY = 1e-4
CLIP = 1
NUM_CYCLES = 5


model_dir = "../Model"
datafilter = "../dataft"
save_dir = "fine_tuned_t5_small"
output_path = os.path.join(datafilter, "test_pred_t5_small_0.csv")
os.makedirs(datafilter, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
os.makedirs(save_dir, exist_ok=True)

In [3]:
# train_data = pd.read_csv("../dataset/train.csv")
# validation_data = pd.read_csv("../dataset/validation.csv")
# test_data = pd.read_csv("../dataset/test.csv")

# # add col
# train_data.rename(columns={"highlights": "summaries", "article":"articles"}, inplace=True)
# validation_data.rename(columns={"highlights": "summaries","article":"articles"}, inplace=True)
# test_data.rename(columns={"highlights": "summaries", "article":"articles"}, inplace=True)

# train_data["article_word_count"] = train_data["articles"].astype(str).apply(lambda x: len(x.split()))
# train_data["summary_word_count"] = train_data["summaries"].astype(str).apply(lambda x: len(x.split()))

# validation_data["article_word_count"] = validation_data["articles"].astype(str).apply(lambda x: len(x.split()))
# validation_data["summary_word_count"] = validation_data["summaries"].astype(str).apply(lambda x: len(x.split()))

# test_data["article_word_count"] = test_data["articles"].astype(str).apply(lambda x: len(x.split()))
# test_data["summary_word_count"] = test_data["summaries"].astype(str).apply(lambda x: len(x.split()))

# # filter range
# train_data = train_data[
#     (train_data["article_word_count"] <= MAX_LENGTH_ARTICLE) & 
#     (train_data["article_word_count"] >= MIN_LENGTH_ARTICLE) &
#     (train_data["summary_word_count"] <= MAX_LENGTH_SUMMARY) &
#     (train_data["summary_word_count"] >= MIN_LENGTH_SUMMARY)
# ]

# validation_data = validation_data[
#     (validation_data["article_word_count"] <= MAX_LENGTH_ARTICLE) & 
#     (validation_data["article_word_count"] >= MIN_LENGTH_ARTICLE) &
#     (validation_data["summary_word_count"] <= MAX_LENGTH_SUMMARY) &
#     (validation_data["summary_word_count"] >= MIN_LENGTH_SUMMARY)
# ]
# test_data = test_data[
#     (test_data["article_word_count"] <= MAX_LENGTH_ARTICLE) & 
#     (test_data["article_word_count"] >= MIN_LENGTH_ARTICLE) &
#     (test_data["summary_word_count"] <= MAX_LENGTH_SUMMARY) &
#     (test_data["summary_word_count"] >= MIN_LENGTH_SUMMARY)
# ]

# train_sample = train_data.sample(frac=FRAC_SAMPLE, random_state=1)
# validation_sample = validation_data.sample(frac=FRAC_SAMPLE, random_state=1)
# test_sample = test_data.sample(frac=1, random_state=1)
# train_sample.info()
# print("\n")
# validation_sample.info()
# train_sample.to_csv(os.path.join(datafilter,"train_sample.csv"), index=False)
# test_sample.to_csv(os.path.join(datafilter,"test_sample.csv"), index=False)
# validation_sample.to_csv(os.path.join(datafilter,"validation_sample.csv"), index=False)


In [4]:
train_sample = pd.read_csv("../dataft/train_sample.csv")
validation_sample = pd.read_csv("../dataft/validation_sample.csv")
test_sample = pd.read_csv("../dataft/test_sample.csv")
train_sample.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 960 entries, 0 to 959
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  960 non-null    object
 1   articles            960 non-null    object
 2   summaries           960 non-null    object
 3   article_word_count  960 non-null    int64 
 4   summary_word_count  960 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 37.6+ KB


In [5]:
# Custom Dataset
class SummarizationDataset(Dataset):
    def __init__(self, data, tokenizer, max_input_length=MAX_LENGTH_ARTICLE, max_output_length=MAX_LENGTH_SUMMARY):
        self.data = data
        self.tokenizer = tokenizer
        self.max_input_length = max_input_length
        self.max_output_length = max_output_length
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        article = self.data.iloc[index]["articles"]
        summary = self.data.iloc[index]["summaries"]
        
        # T5 need prefix:
        input_text = "summarize: " + article
        inputs = self.tokenizer(
            input_text,
            max_length=self.max_input_length,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )
        outputs = self.tokenizer(
            summary,
            max_length=self.max_output_length,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )
        
        return {
            "input_ids": inputs.input_ids.squeeze(),
            "attention_mask": inputs.attention_mask.squeeze(),
            "labels": outputs.input_ids.squeeze()
        }
train_df = train_sample
val_df = validation_sample
test_df = test_sample


In [6]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)

2025-05-08 02:10:17.846310: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-08 02:10:17.862334: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746645017.879991   57698 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746645017.884698   57698 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746645017.902541   57698 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [7]:
train_dataset = SummarizationDataset(train_df, tokenizer)
val_dataset = SummarizationDataset(val_df, tokenizer)
test_dataset = SummarizationDataset(test_df, tokenizer)

In [8]:
# DataLoader
def collate_fn(batch):
    input_ids = torch.stack([item["input_ids"] for item in batch])
    attention_mask = torch.stack([item["attention_mask"] for item in batch])
    labels = torch.stack([item["labels"] for item in batch])
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [9]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)

In [10]:
# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
num_training_steps = len(train_loader) * NUM_EPOCHS
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * num_training_steps),
    num_training_steps=num_training_steps,
    num_cycles=NUM_CYCLES
)

In [11]:
wandb.init(
    project="Finetune-Summarization",
    name=f"t5small-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
    config={
        "model": "t5-small",
        "batch_size": BATCH_SIZE,
        "learning_rate": LEARNING_RATE,
        "weight_decay": WEIGHT_DECAY,
        "num_epochs": NUM_EPOCHS,
        "num_cycles": NUM_CYCLES,
        "data_ratio": FRAC_SAMPLE,
        "warm_up": "Cosine"
    }
)


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvubkk67[0m ([33mvubkk67-hanoi-university-of-science-and-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
# Training loop
best_val_loss = float("inf")
plateau_count = 0
wandb.watch(model)

for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    model.train()
    train_loss = 0.0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]"):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Epoch {epoch+1} [Val]"):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            val_loss += outputs.loss.item()
        val_loss /= len(val_loader)
        current_lr = scheduler.get_last_lr()[0]

    # W&B log
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "val_loss": val_loss,
        "lr": current_lr,
        "best_val_loss": best_val_loss
    })

    # Save best model or increment plateau counter
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        plateau_count = 0  # Reset plateau counter
        model.save_pretrained(save_dir)
        tokenizer.save_pretrained(save_dir)
        print(f"Saved best model to `{save_dir}` at epoch {epoch+1}")
    else:
        plateau_count += 1
        print(f"No improvement in val_loss. Plateau count: {plateau_count}/{MAX_PLATEAU_COUNT}")

    # Early stopping check
    if plateau_count >= MAX_PLATEAU_COUNT:
        print(f"Early stopping triggered at epoch {epoch+1} due to {plateau_count} consecutive non-improvements.")
        break

    print(
        f"Epoch {epoch+1:02d} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f} | "
        f"LR: {current_lr:.6f} | "
        f"Time: {time.time() - start_time:.2f}s"
    )

wandb.finish()

Epoch 1 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch 1 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 1
Epoch 01 | Train Loss: 10.5806 | Val Loss: 9.3234 | LR: 0.000002 | Time: 122.48s


Epoch 2 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 2 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 2
Epoch 02 | Train Loss: 8.8096 | Val Loss: 6.8115 | LR: 0.000004 | Time: 121.12s


Epoch 3 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 3 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 3
Epoch 03 | Train Loss: 5.2138 | Val Loss: 2.8093 | LR: 0.000006 | Time: 118.80s


Epoch 4 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 4 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 4
Epoch 04 | Train Loss: 2.3648 | Val Loss: 1.3045 | LR: 0.000008 | Time: 115.81s


Epoch 5 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 5 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 5
Epoch 05 | Train Loss: 1.7397 | Val Loss: 1.1897 | LR: 0.000010 | Time: 120.69s


Epoch 6 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 6 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 6
Epoch 06 | Train Loss: 1.5217 | Val Loss: 1.1625 | LR: 0.000009 | Time: 114.45s


Epoch 7 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 7 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 7
Epoch 07 | Train Loss: 1.4034 | Val Loss: 1.1480 | LR: 0.000006 | Time: 116.54s


Epoch 8 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 8 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 8
Epoch 08 | Train Loss: 1.3274 | Val Loss: 1.1427 | LR: 0.000003 | Time: 119.35s


Epoch 9 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 9 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 9
Epoch 09 | Train Loss: 1.2856 | Val Loss: 1.1414 | LR: 0.000000 | Time: 121.29s


Epoch 10 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 10 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 10
Epoch 10 | Train Loss: 1.2770 | Val Loss: 1.1412 | LR: 0.000000 | Time: 118.33s


Epoch 11 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 11 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 11
Epoch 11 | Train Loss: 1.2763 | Val Loss: 1.1402 | LR: 0.000002 | Time: 124.92s


Epoch 12 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 12 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 12
Epoch 12 | Train Loss: 1.2328 | Val Loss: 1.1361 | LR: 0.000006 | Time: 126.47s


Epoch 13 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 13 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 13
Epoch 13 | Train Loss: 1.1665 | Val Loss: 1.1305 | LR: 0.000009 | Time: 126.91s


Epoch 14 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 14 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 14
Epoch 14 | Train Loss: 1.1085 | Val Loss: 1.1154 | LR: 0.000010 | Time: 121.78s


Epoch 15 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 15 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 15
Epoch 15 | Train Loss: 1.0625 | Val Loss: 1.0978 | LR: 0.000009 | Time: 119.63s


Epoch 16 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 16 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 16
Epoch 16 | Train Loss: 1.0280 | Val Loss: 1.0840 | LR: 0.000006 | Time: 120.33s


Epoch 17 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 17 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 17
Epoch 17 | Train Loss: 1.0183 | Val Loss: 1.0758 | LR: 0.000003 | Time: 123.58s


Epoch 18 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 18 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 18
Epoch 18 | Train Loss: 1.0060 | Val Loss: 1.0735 | LR: 0.000000 | Time: 117.68s


Epoch 19 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 19 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 19
Epoch 19 | Train Loss: 1.0088 | Val Loss: 1.0733 | LR: 0.000000 | Time: 112.59s


Epoch 20 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 20 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 20
Epoch 20 | Train Loss: 1.0094 | Val Loss: 1.0706 | LR: 0.000002 | Time: 116.46s


Epoch 21 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 21 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 21
Epoch 21 | Train Loss: 0.9969 | Val Loss: 1.0616 | LR: 0.000006 | Time: 127.60s


Epoch 22 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 22 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 22
Epoch 22 | Train Loss: 0.9841 | Val Loss: 1.0485 | LR: 0.000009 | Time: 121.52s


Epoch 23 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 23 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 23
Epoch 23 | Train Loss: 0.9670 | Val Loss: 1.0388 | LR: 0.000010 | Time: 130.58s


Epoch 24 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 24 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 24
Epoch 24 | Train Loss: 0.9529 | Val Loss: 1.0295 | LR: 0.000009 | Time: 115.24s


Epoch 25 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 25 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 25
Epoch 25 | Train Loss: 0.9420 | Val Loss: 1.0240 | LR: 0.000006 | Time: 116.56s


Epoch 26 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 26 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 26
Epoch 26 | Train Loss: 0.9318 | Val Loss: 1.0218 | LR: 0.000003 | Time: 116.84s


Epoch 27 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 27 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 27
Epoch 27 | Train Loss: 0.9268 | Val Loss: 1.0211 | LR: 0.000000 | Time: 117.36s


Epoch 28 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 28 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 28
Epoch 28 | Train Loss: 0.9317 | Val Loss: 1.0211 | LR: 0.000000 | Time: 116.16s


Epoch 29 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 29 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 29
Epoch 29 | Train Loss: 0.9263 | Val Loss: 1.0203 | LR: 0.000002 | Time: 112.24s


Epoch 30 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 30 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 30
Epoch 30 | Train Loss: 0.9246 | Val Loss: 1.0180 | LR: 0.000006 | Time: 111.29s


Epoch 31 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 31 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 31
Epoch 31 | Train Loss: 0.9178 | Val Loss: 1.0141 | LR: 0.000009 | Time: 112.02s


Epoch 32 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 32 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 32
Epoch 32 | Train Loss: 0.9090 | Val Loss: 1.0102 | LR: 0.000010 | Time: 111.03s


Epoch 33 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 33 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 33
Epoch 33 | Train Loss: 0.8978 | Val Loss: 1.0069 | LR: 0.000009 | Time: 112.04s


Epoch 34 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 34 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 34
Epoch 34 | Train Loss: 0.8941 | Val Loss: 1.0058 | LR: 0.000006 | Time: 111.12s


Epoch 35 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 35 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 35
Epoch 35 | Train Loss: 0.8892 | Val Loss: 1.0038 | LR: 0.000003 | Time: 112.03s


Epoch 36 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 36 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 36
Epoch 36 | Train Loss: 0.8874 | Val Loss: 1.0033 | LR: 0.000000 | Time: 111.02s


Epoch 37 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 37 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 37
Epoch 37 | Train Loss: 0.8833 | Val Loss: 1.0032 | LR: 0.000000 | Time: 112.10s


Epoch 38 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 38 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 38
Epoch 38 | Train Loss: 0.8885 | Val Loss: 1.0027 | LR: 0.000002 | Time: 111.19s


Epoch 39 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 39 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 39
Epoch 39 | Train Loss: 0.8823 | Val Loss: 1.0018 | LR: 0.000006 | Time: 112.11s


Epoch 40 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 40 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 40
Epoch 40 | Train Loss: 0.8771 | Val Loss: 0.9997 | LR: 0.000009 | Time: 111.06s


Epoch 41 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 41 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 41
Epoch 41 | Train Loss: 0.8726 | Val Loss: 0.9958 | LR: 0.000010 | Time: 112.16s


Epoch 42 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 42 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 42
Epoch 42 | Train Loss: 0.8703 | Val Loss: 0.9941 | LR: 0.000009 | Time: 111.30s


Epoch 43 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 43 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 43
Epoch 43 | Train Loss: 0.8631 | Val Loss: 0.9925 | LR: 0.000006 | Time: 112.11s


Epoch 44 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 44 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 44
Epoch 44 | Train Loss: 0.8600 | Val Loss: 0.9916 | LR: 0.000002 | Time: 111.20s


Epoch 45 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 45 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 45
Epoch 45 | Train Loss: 0.8557 | Val Loss: 0.9915 | LR: 0.000000 | Time: 112.09s


Epoch 46 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 46 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 46
Epoch 46 | Train Loss: 0.8514 | Val Loss: 0.9914 | LR: 0.000000 | Time: 114.07s


Epoch 47 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 47 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 47
Epoch 47 | Train Loss: 0.8564 | Val Loss: 0.9913 | LR: 0.000002 | Time: 114.50s


Epoch 48 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 48 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 48
Epoch 48 | Train Loss: 0.8563 | Val Loss: 0.9907 | LR: 0.000006 | Time: 113.24s


Epoch 49 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 49 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 49
Epoch 49 | Train Loss: 0.8520 | Val Loss: 0.9903 | LR: 0.000009 | Time: 113.77s


Epoch 50 [Train]:   0%|          | 0/120 [00:00<?, ?it/s]

Epoch 50 [Val]:   0%|          | 0/7 [00:00<?, ?it/s]

Saved best model to `fine_tuned_t5_small` at epoch 50
Epoch 50 | Train Loss: 0.8497 | Val Loss: 0.9890 | LR: 0.000010 | Time: 113.00s


VBox(children=(Label(value='0.229 MB of 0.229 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_val_loss,█▆▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,▂▄▅▇█▅▃▁▁▃▇█▇▅▁▃▅▇█▇▃▁▁▃▅█▇▅▃▁▃▅▇▇▅▁▁▃▅█
train_loss,█▇▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_val_loss,0.99031
epoch,50.0
lr,1e-05
train_loss,0.84965
val_loss,0.98904


In [13]:
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained(save_dir).to(device)


In [14]:
len(test_loader)

528

In [15]:
# Generate predictions
model.eval()
predictions = []

with torch.no_grad():
    for step, batch in enumerate(tqdm(test_loader, desc="Generating summaries")):
        batch = {k: v.to(device) for k, v in batch.items()}

        output_ids = model.generate(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            max_length=MAX_LENGTH_SUMMARY,
            num_beams=4,
            length_penalty=2.0,
            early_stopping=True
        )

        batch_preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
        predictions.extend(batch_preds)

# Save predictions
test_sample = test_df.iloc[:len(predictions)].copy()
test_sample["predicted_summary"] = predictions
test_sample.to_csv(output_path, index=False)

print(f"✅ File has been saved at: {output_path}")

Generating summaries:   0%|          | 0/528 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
test_pred = pd.read_csv(output_path)
# Tính điểm ROUGE
if "summaries" in test_pred.columns:
    rouge = Rouge()
    scores = rouge.get_scores(predictions, test_sample["summaries"].tolist(), avg=True)

    print("ROUGE scores:")
    print(f"ROUGE-1: {scores['rouge-1']['f']:.4f}")
    print(f"ROUGE-2: {scores['rouge-2']['f']:.4f}")
    print(f"ROUGE-L: {scores['rouge-l']['f']:.4f}")
else:
    print("⚠️ Không tìm thấy cột 'summaries' để tính ROUGE.")