In [None]:
# !pip install evaluate accelerate
# !pip install rouge_score

# Also install datasets, transformers

### Load the libraries here

In [6]:
from datasets import load_dataset
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
import evaluate

### Load the datasets and BART_Large

In [2]:
# Load the DialogueSum dataset
dataset = load_dataset("knkarthick/dialogsum")

# Load pre-trained BART tokenizer and model
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

In [21]:
dataset['train']

Dataset({
    features: ['id', 'dialogue', 'summary', 'topic'],
    num_rows: 12460
})

In [19]:
from transformers import BartForConditionalGeneration, BartTokenizer

# Load the model and tokenizer
model_name = "facebook/bart-large-cnn"
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Tokenize input (dialogues) and output (summaries)
inputs = tokenizer(dataset['test']["dialogue"][:10], max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
labels = tokenizer(dataset['test']["summary"][:10], max_length=128, truncation=True, padding="max_length", return_tensors="pt").input_ids

# Forward pass (to compute loss during fine-tuning)
outputs = model(**inputs, labels=labels)
loss = outputs.loss
print(loss)


tensor(9.8428, grad_fn=<NllLossBackward0>)


### Preprocess the dataset

In [3]:
# Tokenize the dataset
def preprocess_data(examples):
    inputs = tokenizer(
        examples["dialogue"], 
        max_length=1024, 
        truncation=True, 
        padding="max_length"
    )
    labels = tokenizer(
        examples["summary"], 
        max_length=128, 
        truncation=True, 
        padding="max_length"
    )
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenized_dataset = dataset.map(preprocess_data, batched=True, remove_columns=["dialogue", "summary", "topic"])

### Function to compute metrics and evaluate the model

In [7]:
# Define evaluation metric
rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    # Optional: Add averages for rouge-1, rouge-2, and rouge-L
    result = {key: value.mid.fmeasure for key, value in result.items()}
    return result

AttributeError: 'DownloadConfig' object has no attribute 'use_auth_token'

### BART Training

In [None]:
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

# Replace TrainingArguments with Seq2SeqTrainingArguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./bart-dialogsum",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=1,  # Try reducing batch size
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,  # Accumulate gradients to simulate larger batch size
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    fp16=True,  # Use mixed precision
    generation_max_length=128,
    generation_num_beams=4,
    logging_dir="./logs",
    logging_steps=100,
)

# Initialize the Seq2SeqTrainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    # compute_metrics=compute_metrics  # Your custom evaluation function
    
)

  trainer = Seq2SeqTrainer(


In [13]:
# Start training
trainer.train()

# Save the model
model.save_pretrained("./fine_tuned_bart")
tokenizer.save_pretrained("./fine_tuned_bart")

  0%|          | 0/4671 [00:00<?, ?it/s]

KeyboardInterrupt: 