In [32]:
!pip install accelerate transformers datasets peft rouge

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


# **Importing Necessary Libraries**

In [37]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import get_peft_model, LoraConfig
from rouge import Rouge

# **Pre-Trained Model using Google FLAN T5**

In [3]:
dataset = load_dataset("cnn_dailymail", "3.0.0")
model_name = "google/flan-t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Downloading readme:   0%|          | 0.00/15.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/259M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

# **Training and Fine-Tuning**

In [4]:
def preprocess_function(examples):
   inputs = [doc for doc in examples['article']]
   model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
   with tokenizer.as_target_tokenizer():
       labels = tokenizer(examples['highlights'], max_length=128, truncation=True, padding="max_length")
   model_inputs["labels"] = labels["input_ids"]
   return model_inputs
encoded_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/287113 [00:00<?, ? examples/s]



Map:   0%|          | 0/13368 [00:00<?, ? examples/s]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [5]:
train_dataset = encoded_dataset["train"].shuffle(seed=42).select(range(2000))
test_dataset = encoded_dataset["validation"].shuffle(seed=42).select(range(1000))

In [6]:
peft_config=LoraConfig(task_type="CAUSAL_LM",
                       r=32,
                       lora_alpha=64,
                       lora_dropout=0.05,
                       bias='none'
)
model=get_peft_model(model,peft_config)

In [7]:
training_args = TrainingArguments(
   output_dir="./results",
   evaluation_strategy="epoch",
   learning_rate=1e-8,
   per_device_train_batch_size=8,
   per_device_eval_batch_size=8,
   num_train_epochs=3,
   weight_decay=0.01,
   save_total_limit=3,
)

trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=train_dataset,
   eval_dataset=test_dataset,
)

trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss
1,2.5652,1.036545
2,1.1435,1.024374
3,1.1189,1.020131


TrainOutput(global_step=1500, training_loss=1.6091946614583332, metrics={'train_runtime': 707.9438, 'train_samples_per_second': 8.475, 'train_steps_per_second': 2.119, 'total_flos': 4173773930496000.0, 'train_loss': 1.6091946614583332, 'epoch': 3.0})

In [24]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def generate_summary(example):
    input_ids = tokenizer.encode(example["article"], return_tensors="pt", max_length=2048, truncation=True).to(device)
    output = model.generate(input_ids, max_length=300, min_length=0, length_penalty=1.0, early_stopping=True)
    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    return {"summary": summary}

summaries = test_dataset.map(generate_summary, batched=False)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]



# **Final Result**

In [43]:
print("Article:", test_dataset[1]["article"])
print("\n-----------------------------------------------------------------------------------------\n")
print("Generated Summary:", summaries[1]["summary"])
print("\n-----------------------------------------------------------------------------------------\n")
print("Reference Summary:", test_dataset[1]['highlights'])

Article: An anorexic teenager whose weight dropped to just five stone is fighting back from the condition by setting up a catering business. Faith March, 18 from Maldon, Essex, was surviving on nothing other than coffee when she dropped to her lowest weight in March of last year. After several ill-fated attempts to fight the illness, Faith collapsed in her bathroom where she was found by her boyfriend - and her family told her they feared for her life if she didn't get help. Scroll down for video . Faith March's weight dropped to just five stone when she was suffering from anorexia (left) but she is now in recovery and has set up her own patisserie business (right) After treatment at the Priory Hospital in Chelmsford, Faith is now at a healthier weight and credits the starting of her patisserie business, Whisk of Faith, as kick-starting her recovery. Faith said: 'This business has helped me get out of a massive hole. If I'm honest, it was a hole I never thought I would get out of. It j

# **Evaluation using ROUGE Score**

In [38]:
def calculate_rouge(reference_list, generated_list):
   rouge = Rouge()
   scores = rouge.get_scores(generated_list, reference_list)
   rouge_1 = sum(score['rouge-1']['f'] for score in scores) / len(scores)
   rouge_2 = sum(score['rouge-2']['f'] for score in scores) / len(scores)
   rouge_l = sum(score['rouge-l']['f'] for score in scores) / len(scores)
   return rouge_1, rouge_2, rouge_l


In [44]:
reference_summaries = [example["highlights"] for example in test_dataset]
generated_summaries = [example["summary"] for example in summaries]


rouge_1, rouge_2, rouge_l = calculate_rouge(reference_summaries, generated_summaries)

print("ROUGE-1:", rouge_1)
print("ROUGE-2:", rouge_2)
print("ROUGE-L:", rouge_l)

ROUGE-1: 0.3728658283963305
ROUGE-2: 0.1587970539985485
ROUGE-L: 0.35389217770768905


ROUGE-1, on average, signifies there is a 37.29% match in unigrams between generated and reference summaries

ROUGE-2, on average, signifies there is a 15.88% match in bigrams betweeen generated and reference summaries

ROUGE-L, on average, signifies that 35.39% of the longest common subsequence of terms in the generated summaries matches that in the reference summaries.