### **1. Chạy ZERO-SHOT đánh giá các mô hình T5, BART, GPT-3 trên bộ dữ liệu CNN-Daily Mail sử dụng evaluation metrics (ROUGE-1, ROUGE-2, ROUGE-L)**
### **2. Fine-tuning các model kể trên 5-10 epoches (lựa chọn số epoches phù hợp dựa trên tài nguyên của bạn) và đánh giá hiệu năng của các model kể trên**

In [None]:
!pip install transformers datasets rouge-score

In [15]:
from datasets import load_dataset

# Nạp dữ liệu CNN/Daily Mail
dataset = load_dataset("cnn_dailymail", '3.0.0')
train_data = dataset['train']
test_data = dataset['test']

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Nạp mô hình và tokenizer của T5 và BART
t5_tokenizer = AutoTokenizer.from_pretrained("t5-small")
t5_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

bart_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")



In [9]:
bart_model.to("cuda")
t5_model.to("cuda")

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [12]:
from rouge_score import rouge_scorer
from transformers import pipeline

# Hàm tạo tóm tắt
def generate_summary(model, tokenizer, article, max_input_length=512, max_output_length=150):
    inputs = tokenizer(article, return_tensors="pt", truncation=True, padding="max_length", max_length=max_input_length).to("cuda")
    summary_ids = model.generate(inputs.input_ids, max_length=max_output_length, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Hàm tính điểm ROUGE
def compute_rouge(predictions, references):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = [scorer.score(p, r) for p, r in zip(predictions, references)]
    avg_rouge = {
        'rouge1': sum([s['rouge1'].fmeasure for s in scores]) / len(scores),
        'rouge2': sum([s['rouge2'].fmeasure for s in scores]) / len(scores),
        'rougeL': sum([s['rougeL'].fmeasure for s in scores]) / len(scores),
    }
    return avg_rouge

In [13]:
# Sử dụng khoảng 100 mẫu từ bộ test để thử nghiệm
num_samples = 100
references = [sample for sample in test_data[:num_samples]['highlights']]
articles = [sample for sample in test_data[:num_samples]['article']]

# Đánh giá với T5
t5_summaries = [generate_summary(t5_model, t5_tokenizer, article) for article in articles]
t5_rouge_scores = compute_rouge(t5_summaries, references)

# Đánh giá với BART
bart_summaries = [generate_summary(bart_model, bart_tokenizer, article) for article in articles]
bart_rouge_scores = compute_rouge(bart_summaries, references)

print(f"T5 Rouge Scores: {t5_rouge_scores}")
print(f"BART Rouge Scores: {bart_rouge_scores}")


T5 Rouge Scores: {'rouge1': 0.2941181768181674, 'rouge2': 0.10573059919957957, 'rougeL': 0.2124864171346702}
BART Rouge Scores: {'rouge1': 0.3691310626378165, 'rouge2': 0.1644411449128128, 'rougeL': 0.2810656346292879}


In [22]:
train_data.select(range(100))

Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 100
})

In [30]:
def preprocess_data(tokenizer, data, max_input_length=512, max_output_length=150):
    def preprocess_function(examples):
        inputs = examples['article']
        targets = examples['highlights']
        model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, padding="max_length", return_tensors="pt")
        labels = tokenizer(targets, max_length=max_output_length, truncation=True, padding="max_length", return_tensors="pt").input_ids
        labels[labels == tokenizer.pad_token_id] = -100  # Đặt giá trị pad_token thành -100 để bỏ qua trong loss
        model_inputs['labels'] = labels
        return model_inputs
    return data.map(preprocess_function, batched=True)

train_data_t5 = preprocess_data(t5_tokenizer, train_data.select(range(100)))
validation_data_t5 = preprocess_data(t5_tokenizer, test_data.select(range(100)))

train_data_bart = preprocess_data(bart_tokenizer, train_data.select(range(100)))
validation_data_bart = preprocess_data(bart_tokenizer, test_data.select(range(100)))

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [31]:
from transformers import Seq2SeqTrainer, TrainingArguments
from transformers import Seq2SeqTrainingArguments # Importing the necessary class

In [32]:
training_args_t5 = Seq2SeqTrainingArguments(
    output_dir="./results_t5",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    predict_with_generate=True
)

training_args_bart = Seq2SeqTrainingArguments(
    output_dir="./results_bart",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    predict_with_generate=True
)



In [33]:
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

# Huấn luyện T5
trainer_t5 = Seq2SeqTrainer(
    model=t5_model,
    args=training_args_t5,
    train_dataset=train_data_t5,
    eval_dataset=validation_data_t5,
    tokenizer=t5_tokenizer
)

trainer_t5.train()

# Huấn luyện BART
trainer_bart = Seq2SeqTrainer(
    model=bart_model,
    args=training_args_bart,
    train_dataset=train_data_bart,
    eval_dataset=validation_data_bart,
    tokenizer=bart_tokenizer
)

trainer_bart.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss
1,No log,2.495358
2,No log,2.421298
3,No log,2.396831


Epoch,Training Loss,Validation Loss
1,No log,2.007286
2,No log,1.879884
3,No log,1.854416


Non-default generation parameters: {'max_length': 142, 'min_length': 56, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


TrainOutput(global_step=39, training_loss=1.4844587277143428, metrics={'train_runtime': 193.0358, 'train_samples_per_second': 1.554, 'train_steps_per_second': 0.202, 'total_flos': 325065690316800.0, 'train_loss': 1.4844587277143428, 'epoch': 3.0})

In [36]:
from rouge_score import rouge_scorer

# Tính điểm ROUGE cho các mô hình đã fine-tune
def compute_rouge_for_finetuned_model(model, tokenizer, test_data):
    predictions = []
    references = test_data['highlights']

    for sample in test_data:
        article = sample['article']
        inputs = tokenizer(article, return_tensors="pt", truncation=True, padding="max_length", max_length=512).to("cuda")
        summary_ids = model.generate(inputs.input_ids, max_length=150, num_beams=4, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        predictions.append(summary)

    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = [scorer.score(p, r) for p, r in zip(predictions, references)]
    avg_rouge = {
        'rouge1': sum([s['rouge1'].fmeasure for s in scores]) / len(scores),
        'rouge2': sum([s['rouge2'].fmeasure for s in scores]) / len(scores),
        'rougeL': sum([s['rougeL'].fmeasure for s in scores]) / len(scores),
    }
    return avg_rouge

# Đánh giá mô hình T5 và BART
t5_test_scores = compute_rouge_for_finetuned_model(t5_model, t5_tokenizer, test_data.select(range(100)))
bart_test_scores = compute_rouge_for_finetuned_model(bart_model, bart_tokenizer, test_data.select(range(100)))

print(f"T5 Rouge Scores after fine-tuning: {t5_test_scores}")
print(f"BART Rouge Scores after fine-tuning: {bart_test_scores}")


T5 Rouge Scores after fine-tuning: {'rouge1': 0.30604715036387514, 'rouge2': 0.1152985515762151, 'rougeL': 0.22024207117007347}
BART Rouge Scores after fine-tuning: {'rouge1': 0.3627152530304758, 'rouge2': 0.15484770243857685, 'rougeL': 0.2586687196641008}
