In [None]:
!pip install transformers datasets torch evaluate rouge-score sacrebleu bert_score

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments

dataset = load_dataset("Bilal-Mamji/Medical-summary")

dataset = dataset.rename_column('input', 'input_text')
dataset = dataset.rename_column('output', 'target_text')

tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2")

tokenizer.pad_token = tokenizer.eos_token

def preprocess_data(batch):
    inputs = tokenizer(batch["input_text"], max_length=512, truncation=True, padding="max_length")
    targets = tokenizer(batch["target_text"], max_length=512, truncation=True, padding="max_length")
    inputs["labels"] = targets["input_ids"]
    return inputs

tokenized_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset["train"].column_names)

train_test_split = tokenized_dataset["train"].train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
val_dataset = train_test_split["test"]

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_steps=100,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10,
    fp16=True,
    remove_unused_columns=False,
)

import os
os.environ["WANDB_DISABLED"] = "true"


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)


trainer.train()

model.save_pretrained("./fine_tuned_distilgpt2")
tokenizer.save_pretrained("./fine_tuned_distilgpt2")


from evaluate import load
rouge = load("rouge")
bleu = load("sacrebleu")
bertscore = load("bertscore")


def generate_predictions(test_dataset, model, tokenizer):
    predictions = []
    references = []
    for example in test_dataset:

        inputs = tokenizer(
            example["input_text"], return_tensors="pt", max_length=512, truncation=True, padding="max_length"
        )
        inputs = {key: value.to("cuda" if torch.cuda.is_available() else "cpu") for key, value in inputs.items()}

        outputs = model.generate(**inputs, max_length=512, num_beams=4)
        predictions.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
        references.append(example["target_text"])
    return predictions, references

test_dialogues = [example for example in dataset["test"]]

predictions, references = generate_predictions(test_dialogues, model, tokenizer)

rouge_scores = rouge.compute(predictions=predictions, references=references)
print("ROUGE Scores:", rouge_scores)

bleu_scores = bleu.compute(predictions=predictions, references=[[ref] for ref in references])
print("BLEU Scores:", bleu_scores)

bert_scores = bertscore.compute(predictions=predictions, references=references, model_type="bert-base-uncased")
print("BERT Scores:", bert_scores)




Map:   0%|          | 0/250 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss
1,4.8907,4.918923
