In [1]:
pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install trl

Collecting trl
  Downloading trl-0.11.4-py3-none-any.whl.metadata (12 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.14-py3-none-any.whl.metadata (8.4 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Downloading trl-0.11.4-py3-none-any.whl (316 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.6/316.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hDownloading tyro-0.8.14-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.8/109.8 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)
Installing collected packages: shtab, tyro, trl
Successfully installed shtab-1.7.1 trl-0.11.4 tyro-0.8.14
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=665664ca1f9471e6a925bb0328f2e25d9a9a5ff3f0581c85d36f4899f540c480
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Note: you may need to restart the kernel to use updated packages.


In [4]:
from datasets import load_dataset
import random 
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorWithPadding, TrainingArguments
import numpy as np
import evaluate
from trl import SFTTrainer

rouge = evaluate.load("rouge")

data_files = {"train": "train.csv", "test": "test.csv", "validation": "validation.csv"}
dataset = load_dataset("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail", data_files=data_files)

num_train_samples = 27000
train_select = random.sample(range(len(dataset["train"])), k=num_train_samples)

num_validation_samples = 100
validation_select = random.sample(range(len(dataset["validation"])), k=num_validation_samples)

num_test_samples = 100
test_select = random.sample(range(len(dataset["test"])), k=num_test_samples)

dataset_train = dataset["train"].select(train_select)
dataset_validation = dataset["validation"].select(validation_select)
dataset_test = dataset["test"].select(test_select)

model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

for name, param in model.named_parameters():
    if "transformer.h." in name:  
        param.requires_grad = False  
    elif "transformer.lm_head." in name:  
        param.requires_grad = True  

prefix = "summarize: "

def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["article"]]
    model_inputs = tokenizer(inputs, max_length=400, truncation=True, padding='max_length')
    labels = tokenizer(text_target=examples["highlights"], max_length=400, truncation=True, padding='max_length')
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_train = dataset_train.map(preprocess_function, batched=True)
tokenized_validation = dataset_validation.map(preprocess_function, batched=True)
tokenized_test = dataset_test.map(preprocess_function, batched=True)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1)
    
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

training_args = TrainingArguments(
    output_dir="gpt2_test",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=6,
    per_device_eval_batch_size=1,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=20,
    report_to="none",
    fp16=True,
    eval_accumulation_steps=8
)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_train, 
    eval_dataset=tokenized_validation,
    tokenizer=tokenizer,
    args=training_args,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

trainer.train()


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



Map:   0%|          | 0/27000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,1.4454,1.751391,0.0579,0.0011,0.0462,0.0556,30.45
2,1.3726,1.666207,0.0382,0.0012,0.0333,0.0378,41.15
3,1.353,1.623973,0.0372,0.0014,0.0343,0.0372,42.34
4,1.3314,1.602583,0.0319,0.0004,0.0285,0.0318,39.46
5,1.3161,1.590284,0.0274,0.0005,0.024,0.0275,37.9
6,1.3028,1.584727,0.0321,0.0003,0.0283,0.0313,36.46
7,1.3135,1.581346,0.0445,0.0003,0.0398,0.0434,37.49
8,1.303,1.585655,0.0339,0.0002,0.0315,0.0329,37.73
9,1.2884,1.578732,0.0305,0.0,0.0282,0.0296,37.13
10,1.2934,1.572558,0.0319,0.0,0.0301,0.0314,37.84


TrainOutput(global_step=90000, training_loss=1.3591586283365886, metrics={'train_runtime': 30087.5502, 'train_samples_per_second': 17.948, 'train_steps_per_second': 2.991, 'total_flos': 1.10232576e+17, 'train_loss': 1.3591586283365886, 'epoch': 20.0})