In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
import torch.optim as optim
import random 
from tqdm import tqdm
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
from tqdm.auto import tqdm
from datetime import datetime
import wandb
import time
import os
import re
import math
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, get_linear_schedule_with_warmup
from torch.optim import AdamW

from torch.utils.data import DataLoader, Dataset
from rouge import Rouge


2025-04-13 10:30:52.868710: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-13 10:30:52.917592: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-13 10:30:52.946018: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-13 10:30:52.953860: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-13 10:30:52.990781: I tensorflow/core/platform/cpu_feature_guar

In [2]:
NUM_EPOCHS = 50
BATCH_SIZE = 8
FRAC_SAMPLE = 0.02
MAX_LENGTH_ARTICLE = 512
MIN_LENGTH_ARTICLE = 50
MAX_LENGTH_SUMMARY = 128
MIN_LENGTH_SUMMARY = 20
HIDDEN_DIM = 128
LEARNING_RATE = 1e-5
MAX_PLATEAU_COUNT = 7
WEIGHT_DECAY = 1e-5
CLIP = 1
USE_PRETRAINED_EMB = True
USE_SCHEDULER = True
SCHEDULER_TYPE = "plateau"  # hoặc cosine, linear
NUM_CYCLES = 7


model_dir = "../Model"
datafilter = "../dataft"
save_dir = "fine_tuned_bart_cosine_3"
output_path = os.path.join(datafilter, "test_pred_3.csv")
os.makedirs(datafilter, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
os.makedirs(save_dir, exist_ok=True)

In [3]:
train_data = pd.read_csv("../dataset/train.csv")
validation_data = pd.read_csv("../dataset/validation.csv")
test_data = pd.read_csv("../dataset/test.csv")

# add col
train_data.rename(columns={"highlights": "summaries", "article":"articles"}, inplace=True)
validation_data.rename(columns={"highlights": "summaries","article":"articles"}, inplace=True)
test_data.rename(columns={"highlights": "summaries", "article":"articles"}, inplace=True)

train_data["article_word_count"] = train_data["articles"].astype(str).apply(lambda x: len(x.split()))
train_data["summary_word_count"] = train_data["summaries"].astype(str).apply(lambda x: len(x.split()))

validation_data["article_word_count"] = validation_data["articles"].astype(str).apply(lambda x: len(x.split()))
validation_data["summary_word_count"] = validation_data["summaries"].astype(str).apply(lambda x: len(x.split()))

test_data["article_word_count"] = test_data["articles"].astype(str).apply(lambda x: len(x.split()))
test_data["summary_word_count"] = test_data["summaries"].astype(str).apply(lambda x: len(x.split()))

# filter range
train_data = train_data[
    (train_data["article_word_count"] <= MAX_LENGTH_ARTICLE) & 
    (train_data["article_word_count"] >= MIN_LENGTH_ARTICLE) &
    (train_data["summary_word_count"] <= MAX_LENGTH_SUMMARY) &
    (train_data["summary_word_count"] >= MIN_LENGTH_SUMMARY)
]

validation_data = validation_data[
    (validation_data["article_word_count"] <= MAX_LENGTH_ARTICLE) & 
    (validation_data["article_word_count"] >= MIN_LENGTH_ARTICLE) &
    (validation_data["summary_word_count"] <= MAX_LENGTH_SUMMARY) &
    (validation_data["summary_word_count"] >= MIN_LENGTH_SUMMARY)
]
test_data = test_data[
    (test_data["article_word_count"] <= MAX_LENGTH_ARTICLE) & 
    (test_data["article_word_count"] >= MIN_LENGTH_ARTICLE) &
    (test_data["summary_word_count"] <= MAX_LENGTH_SUMMARY) &
    (test_data["summary_word_count"] >= MIN_LENGTH_SUMMARY)
]

train_sample = train_data.sample(frac=FRAC_SAMPLE, random_state=1)
validation_sample = validation_data.sample(frac=FRAC_SAMPLE, random_state=1)
test_sample = test_data.sample(frac=1, random_state=1)
train_sample.info()
print("\n")
validation_sample.info()
train_sample.to_csv(os.path.join(datafilter,"train_sample.csv"), index=False)
test_sample.to_csv(os.path.join(datafilter,"test_sample.csv"), index=False)
validation_sample.to_csv(os.path.join(datafilter,"validation_sample.csv"), index=False)


<class 'pandas.core.frame.DataFrame'>
Index: 1920 entries, 144417 to 175369
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  1920 non-null   object
 1   articles            1920 non-null   object
 2   summaries           1920 non-null   object
 3   article_word_count  1920 non-null   int64 
 4   summary_word_count  1920 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 90.0+ KB


<class 'pandas.core.frame.DataFrame'>
Index: 99 entries, 8901 to 10574
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  99 non-null     object
 1   articles            99 non-null     object
 2   summaries           99 non-null     object
 3   article_word_count  99 non-null     int64 
 4   summary_word_count  99 non-null     int64 
dtypes: int64(2), object(3)
memory usage: 4.6+ KB


In [4]:
train_sample = pd.read_csv("../dataft/train_sample.csv")
validation_sample = pd.read_csv("../dataft/validation_sample.csv")
test_sample = pd.read_csv("../dataft/test_sample.csv")
train_sample.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1920 entries, 0 to 1919
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  1920 non-null   object
 1   articles            1920 non-null   object
 2   summaries           1920 non-null   object
 3   article_word_count  1920 non-null   int64 
 4   summary_word_count  1920 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 75.1+ KB


In [5]:
class SummarizationDataset(Dataset):
    def __init__(self, data, tokenizer, max_input_length=MAX_LENGTH_ARTICLE, max_output_length=MAX_LENGTH_SUMMARY):
        self.data = data
        self.tokenizer = tokenizer
        self.max_input_length = max_input_length
        self.max_output_length = max_output_length
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        article = self.data.iloc[index]["articles"]
        summary = self.data.iloc[index]["summaries"]
        input_ids = self.tokenizer.encode(article, max_length=self.max_input_length, truncation=True, padding="max_length")
        output_ids = self.tokenizer.encode(summary, max_length=self.max_output_length, truncation=True, padding="max_length")
        return {"input_ids": input_ids, "attention_mask": [int(token_id != 0) for token_id in input_ids], "decoder_input_ids": output_ids[:-1], "decoder_attention_mask": [1] * (len(output_ids) - 1), "labels": output_ids[1:]}
train_df = train_sample
test_df = test_sample
val_df = validation_sample

In [6]:
from transformers import BartTokenizer, BartForConditionalGeneration

tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device)


In [7]:
train_dataset = SummarizationDataset(train_df, tokenizer)
val_dataset = SummarizationDataset(val_df, tokenizer)

In [8]:
def collate_fn(batch):
    input_ids = [item["input_ids"] for item in batch]
    attention_mask = [item["attention_mask"] for item in batch]
    decoder_input_ids = [item["decoder_input_ids"] for item in batch]
    decoder_attention_mask = [item["decoder_attention_mask"] for item in batch]
    labels = [item["labels"] for item in batch]
    max_input_length = max(len(ids) for ids in input_ids)
    max_output_length = max(len(ids) for ids in decoder_input_ids)
    input_ids = [ids + [0] * (max_input_length - len(ids)) for ids in input_ids]
    attention_mask = [mask + [0] * (max_input_length - len(mask)) for mask in attention_mask]
    decoder_input_ids = [ids + [0] * (max_output_length - len(ids)) for ids in decoder_input_ids]
    decoder_attention_mask = [mask + [0] * (max_output_length - len(mask)) for mask in decoder_attention_mask]
    labels = [ids + [-100] * (max_output_length - len(ids)) for ids in labels]
    return {"input_ids": torch.tensor(input_ids), "attention_mask": torch.tensor(attention_mask), "decoder_input_ids": torch.tensor(decoder_input_ids), "decoder_attention_mask": torch.tensor(decoder_attention_mask), "labels": torch.tensor(labels)}

In [9]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,collate_fn=collate_fn)

In [10]:
from transformers import get_cosine_schedule_with_warmup

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
num_training_steps = (len(train_loader) * NUM_EPOCHS)

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=500,
    num_training_steps=num_training_steps,
    num_cycles=NUM_CYCLES 
)


In [11]:
len(train_loader),len(val_loader)

(240, 13)

In [12]:
wandb.init(
    project="Finetune-Summarization",
    name=f"bartbase-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
    config={
        "model": "Bartbase_cosine_3",
        "batch_size": BATCH_SIZE,
        "learning_rate": LEARNING_RATE,
        "weight_decay": WEIGHT_DECAY,
        "num_epochs": NUM_EPOCHS,
        "num_cycles": NUM_CYCLES,
        "data_ratio": FRAC_SAMPLE,
        
    }
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvubkk67[0m ([33mvubkk67-hanoi-university-of-science-and-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
# Save best model
best_val_loss = float("inf")
# W&B setup
wandb.watch(model)
# Training loop
for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    model.train()
    train_loss = 0.0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]"):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Epoch {epoch+1} [Val]"):
            batch = {k: v.to(device) for k, v in batch.items()}  # Thêm dòng này
            outputs = model(**batch)
            val_loss += outputs.loss.item()
        val_loss /= len(val_loader)
        current_lr = scheduler.get_last_lr()[0]

    # W&B log
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "val_loss": val_loss,
        "lr": current_lr,
        "best_val_loss": best_val_loss
    })

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        model.save_pretrained(save_dir)
        tokenizer.save_pretrained(save_dir)
        print(f"Saved best model to `{save_dir}` at epoch {epoch+1}")

    print(
        f"Epoch {epoch+1:02d} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f} | "
        f"LR: {current_lr:.6f} | "
        f"Time: {time.time() - start_time:.2f}s"
    )

# W&B end
wandb.finish()


Epoch 1 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 1 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]



Saved best model to `fine_tuned_bart_cosine_3` at epoch 1
Epoch 01 | Train Loss: 9.5626 | Val Loss: 4.3324 | LR: 0.000005 | Time: 438.31s


Epoch 2 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 2 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 2
Epoch 02 | Train Loss: 3.4929 | Val Loss: 1.6314 | LR: 0.000010 | Time: 431.81s


Epoch 3 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 3 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 3
Epoch 03 | Train Loss: 1.5320 | Val Loss: 1.2059 | LR: 0.000008 | Time: 432.06s


Epoch 4 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 4 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 4
Epoch 04 | Train Loss: 1.2309 | Val Loss: 1.1404 | LR: 0.000004 | Time: 431.68s


Epoch 5 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 5 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 5
Epoch 05 | Train Loss: 1.1736 | Val Loss: 1.1324 | LR: 0.000001 | Time: 432.61s


Epoch 6 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 6 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 6
Epoch 06 | Train Loss: 1.1615 | Val Loss: 1.1318 | LR: 0.000001 | Time: 432.08s


Epoch 7 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 7 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 7
Epoch 07 | Train Loss: 1.1534 | Val Loss: 1.1230 | LR: 0.000004 | Time: 432.23s


Epoch 8 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 8 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 8
Epoch 08 | Train Loss: 1.1208 | Val Loss: 1.0870 | LR: 0.000008 | Time: 431.96s


Epoch 9 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 9 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 9
Epoch 09 | Train Loss: 1.0689 | Val Loss: 1.0611 | LR: 0.000010 | Time: 457.28s


Epoch 10 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 10 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 10 | Train Loss: 1.0199 | Val Loss: 1.0612 | LR: 0.000008 | Time: 460.30s


Epoch 11 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 11 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 11
Epoch 11 | Train Loss: 0.9772 | Val Loss: 1.0466 | LR: 0.000003 | Time: 455.09s


Epoch 12 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 12 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 12
Epoch 12 | Train Loss: 0.9598 | Val Loss: 1.0457 | LR: 0.000000 | Time: 451.70s


Epoch 13 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 13 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 13
Epoch 13 | Train Loss: 0.9501 | Val Loss: 1.0453 | LR: 0.000001 | Time: 434.17s


Epoch 14 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 14 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 14 | Train Loss: 0.9507 | Val Loss: 1.0457 | LR: 0.000005 | Time: 431.82s


Epoch 15 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 15 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 15
Epoch 15 | Train Loss: 0.9395 | Val Loss: 1.0342 | LR: 0.000009 | Time: 433.46s


Epoch 16 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 16 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Saved best model to `fine_tuned_bart_cosine_3` at epoch 16
Epoch 16 | Train Loss: 0.9146 | Val Loss: 1.0332 | LR: 0.000010 | Time: 432.77s


Epoch 17 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 17 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 17 | Train Loss: 0.8839 | Val Loss: 1.0415 | LR: 0.000007 | Time: 432.73s


Epoch 18 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 18 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 18 | Train Loss: 0.8501 | Val Loss: 1.0371 | LR: 0.000003 | Time: 431.75s


Epoch 19 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 19 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 19 | Train Loss: 0.8297 | Val Loss: 1.0360 | LR: 0.000000 | Time: 432.08s


Epoch 20 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 20 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 20 | Train Loss: 0.8270 | Val Loss: 1.0346 | LR: 0.000001 | Time: 431.98s


Epoch 21 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 21 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 21 | Train Loss: 0.8314 | Val Loss: 1.0339 | LR: 0.000005 | Time: 432.69s


Epoch 22 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 22 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 22 | Train Loss: 0.8231 | Val Loss: 1.0427 | LR: 0.000009 | Time: 432.36s


Epoch 23 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 23 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 23 | Train Loss: 0.7986 | Val Loss: 1.0370 | LR: 0.000010 | Time: 432.50s


Epoch 24 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 24 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 24 | Train Loss: 0.7707 | Val Loss: 1.0373 | LR: 0.000006 | Time: 432.04s


Epoch 25 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 25 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 25 | Train Loss: 0.7414 | Val Loss: 1.0441 | LR: 0.000002 | Time: 432.41s


Epoch 26 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 26 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 26 | Train Loss: 0.7275 | Val Loss: 1.0474 | LR: 0.000000 | Time: 431.91s


Epoch 27 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 27 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 27 | Train Loss: 0.7242 | Val Loss: 1.0491 | LR: 0.000002 | Time: 431.97s


Epoch 28 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 28 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 28 | Train Loss: 0.7245 | Val Loss: 1.0527 | LR: 0.000006 | Time: 431.84s


Epoch 29 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 29 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 29 | Train Loss: 0.7201 | Val Loss: 1.0493 | LR: 0.000010 | Time: 431.88s


Epoch 30 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 30 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 30 | Train Loss: 0.6998 | Val Loss: 1.0710 | LR: 0.000009 | Time: 432.61s


Epoch 31 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 31 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 31 | Train Loss: 0.6718 | Val Loss: 1.0695 | LR: 0.000006 | Time: 432.46s


Epoch 32 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 32 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 32 | Train Loss: 0.6446 | Val Loss: 1.0727 | LR: 0.000002 | Time: 432.43s


Epoch 33 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 33 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 33 | Train Loss: 0.6288 | Val Loss: 1.0752 | LR: 0.000000 | Time: 432.83s


Epoch 34 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 34 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 34 | Train Loss: 0.6281 | Val Loss: 1.0734 | LR: 0.000002 | Time: 432.84s


Epoch 35 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 35 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 35 | Train Loss: 0.6314 | Val Loss: 1.0802 | LR: 0.000007 | Time: 432.34s


Epoch 36 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 36 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 36 | Train Loss: 0.6231 | Val Loss: 1.0790 | LR: 0.000010 | Time: 432.02s


Epoch 37 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 37 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 37 | Train Loss: 0.6033 | Val Loss: 1.0812 | LR: 0.000009 | Time: 431.79s


Epoch 38 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 38 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 38 | Train Loss: 0.5767 | Val Loss: 1.0951 | LR: 0.000005 | Time: 432.40s


Epoch 39 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 39 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 39 | Train Loss: 0.5482 | Val Loss: 1.0972 | LR: 0.000001 | Time: 431.90s


Epoch 40 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 40 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 40 | Train Loss: 0.5382 | Val Loss: 1.0994 | LR: 0.000000 | Time: 432.28s


Epoch 41 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 41 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 41 | Train Loss: 0.5377 | Val Loss: 1.1034 | LR: 0.000003 | Time: 431.99s


Epoch 42 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 42 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 42 | Train Loss: 0.5550 | Val Loss: 1.1058 | LR: 0.000007 | Time: 432.41s


Epoch 43 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 43 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 43 | Train Loss: 0.5419 | Val Loss: 1.1159 | LR: 0.000010 | Time: 432.03s


Epoch 44 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 44 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 44 | Train Loss: 0.5160 | Val Loss: 1.1126 | LR: 0.000009 | Time: 432.10s


Epoch 45 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 45 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 45 | Train Loss: 0.5006 | Val Loss: 1.1155 | LR: 0.000004 | Time: 431.97s


Epoch 46 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 46 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 46 | Train Loss: 0.4812 | Val Loss: 1.1301 | LR: 0.000001 | Time: 432.72s


Epoch 47 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 47 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 47 | Train Loss: 0.4739 | Val Loss: 1.1309 | LR: 0.000000 | Time: 431.88s


Epoch 48 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 48 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 48 | Train Loss: 0.4731 | Val Loss: 1.1285 | LR: 0.000004 | Time: 432.30s


Epoch 49 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 49 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 49 | Train Loss: 0.4743 | Val Loss: 1.1335 | LR: 0.000008 | Time: 431.93s


Epoch 50 [Train]:   0%|          | 0/240 [00:00<?, ?it/s]

Epoch 50 [Val]:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch 50 | Train Loss: 0.4658 | Val Loss: 1.1480 | LR: 0.000010 | Time: 432.72s


VBox(children=(Label(value='0.430 MB of 0.430 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
best_val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
lr,▄█▇▄▁▄▇█▆▃▂▄▇█▆▁▂▅▇█▁▂▅█▅▁▃▆█▇▂▁▃▆█▄▁▁▄█
train_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_val_loss,1.03315
epoch,50.0
lr,1e-05
train_loss,0.46583
val_loss,1.14797


In [14]:
# tokenizer = PegasusTokenizer.from_pretrained(save_dir)
# model = PegasusForConditionalGeneration.from_pretrained(save_dir).to(device)
from transformers import BartTokenizer, BartForConditionalGeneration

tokenizer = BartTokenizer.from_pretrained(save_dir)
model = BartForConditionalGeneration.from_pretrained(save_dir).to(device)


In [15]:
test_dataset = SummarizationDataset(test_df, tokenizer)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,collate_fn=collate_fn)
len(test_loader)

528

In [16]:
model.to(device)
model.eval()

predictions = []

with torch.no_grad():
    for step, batch in enumerate(tqdm(test_loader, desc="Generating summaries")):
        batch = {k: v.to(device) for k, v in batch.items()}

        output_ids = model.generate(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            max_length=128,
            decoder_start_token_id=tokenizer.pad_token_id,
            num_beams=4,
            length_penalty=2.0,
            early_stopping=True
        )

        batch_preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
        predictions.extend(batch_preds)
test_sample = test_sample.iloc[:len(predictions)].copy()
test_sample["predicted_summary"] = predictions
test_sample.to_csv(output_path, index=False)

print(f"✅ File has been saved at: {output_path}")

Generating summaries:   0%|          | 0/528 [00:00<?, ?it/s]

✅ File has been saved at: ../dataft/test_pred_3.csv


In [17]:
test_pred = pd.read_csv(output_path)

In [18]:
display(test_pred[["articles","summaries", "predicted_summary"]].head(2))


Unnamed: 0,articles,summaries,predicted_summary
0,A Florida bus passenger was arrested for throw...,"Joel Parker, 33, was riding the bus in St John...","Joel Parker, 33, was about to get off the Sun..."
1,Aston Villa may be able to sign Cordoba strike...,Aston Villa have held talks over Cordoba strik...,ston Villa may be able to sign Cordoba striker...


In [19]:
# Tính điểm ROUGE
if "summaries" in test_pred.columns:
    rouge = Rouge()
    scores = rouge.get_scores(predictions, test_sample["summaries"].tolist(), avg=True)

    print("ROUGE scores:")
    print(f"ROUGE-1: {scores['rouge-1']['f']:.4f}")
    print(f"ROUGE-2: {scores['rouge-2']['f']:.4f}")
    print(f"ROUGE-L: {scores['rouge-l']['f']:.4f}")
else:
    print("⚠️ Không tìm thấy cột 'summaries' để tính ROUGE.")

ROUGE scores:
ROUGE-1: 0.3807
ROUGE-2: 0.1660
ROUGE-L: 0.3589
