In [1]:
!pip install datasets



In [2]:
from datasets import load_dataset

samsum_train_dataset = load_dataset("csv", data_files={"train": "/kaggle/input/samsumnew/samsum-train.csv"})
samsum_test_dataset = load_dataset("csv", data_files={"test": "/kaggle/input/samsumnew/samsum-test.csv"})
samsum_validate_dataset = load_dataset("csv", data_files={"validation": "/kaggle/input/samsumnew/samsum-validation.csv"})

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [3]:
# Check the number of rows in each split of the dataset
print(f"Training dataset size: {samsum_train_dataset['train'].num_rows}")
print(f"Test dataset size: {samsum_test_dataset['test'].num_rows}")
print(f"Validation dataset size: {samsum_validate_dataset['validation'].num_rows}")

Training dataset size: 14732
Test dataset size: 819
Validation dataset size: 818


In [4]:
samsum_train_dataset["train"][17]

{'id': '13729191',
 'dialogue': 'Betty: Please remind me next time that too much wine isn\'t good for me and me surrounding\r\nBetty: Hangover is one thing\r\nBetty: But I feel like never touching wine again\r\nAmber: Hahaha. You were so drunk with wine last night, you couldn\'t walk straight even 5 steps\r\nAmber: You took off your weeding ring and shouted "My precious"\r\nBetty: Uhh. I have no blackouts so I know exactly what to feel embarassed about....',
 'summary': 'Betty feels remorse she got drunk last night and went out of control.'}

In [5]:
print(samsum_test_dataset.keys())

dict_keys(['test'])


In [6]:
from transformers import pipeline

text_summarizer = pipeline("summarization", model="facebook/bart-base", device=0)


config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [7]:
text_summarizer(samsum_train_dataset["train"][128]["dialogue"], max_length=20, min_length=10, do_sample= False )

[{'summary_text': 'Dorothy: Hi! You know what? Ron messaged me again, and'}]

In [8]:
# Fine tune the SamSUM model to improve the summarize performance
# Add the BART tokenizer and model
from transformers import BartForConditionalGeneration, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base", dropout=0.3)

In [9]:
# Remove the icon tag like =), :v
# Guess the word meaning for the missing character of a word
import re

def preprocess_missingchar_and_icon(sample):
    def clean_text(text):
        # Convert to string explicitly
        text = str(text)

        # Remove icon tags, including the characters inside angled brackets (e.g., <photo>, <emoji>)
        text = re.sub(r'<.*?>', '', text)

        # Remove common emoticons or icons (e.g., :D, :P, :v, <3)
        text = re.sub(r'(:\)|:-\)|:\(|:-\(|:D|:P|:v|<3|;\)|;D)', '', text)

        # Replace escaped single quotes (\') with single quotes (')
        text = re.sub(r"\\'", "", text)

        # Remove duplicate periods (e.g., ".." or ". .")
        text = re.sub(r'\.\s*\.', '.', text)

        # Ensure each speaker turn ends with a period (but not after ! or ?)
        text = re.sub(r'([^\.\!\?])(\s+[A-Za-z]+:)', r'\1.\2', text)

        # Remove redundant periods after speaker turns
        text = re.sub(r'(\.\s*\.)+', '.', text)

        # Ensure the last sentence ends with a period if it doesn't already
        text = re.sub(r'([^\.\!\?])$', r'\1.', text)

        # Remove unwanted extra periods after exclamation or question marks
        text = re.sub(r'([!\?])\s*\.', r'\1', text)

        # Normalize whitespace
        text = re.sub(r'\s+', ' ', text).strip()

        return text


    # Apply cleaning to the text and summary fields
    sample["dialogue"] = clean_text(sample["dialogue"])
    sample["summary"] = clean_text(sample["summary"])
    return sample

samsum_train_dataset_clean = samsum_train_dataset.map(preprocess_missingchar_and_icon)
samsum_test_dataset_clean = samsum_test_dataset.map(preprocess_missingchar_and_icon)
samsum_validate_dataset_clean = samsum_validate_dataset.map(preprocess_missingchar_and_icon)

Map:   0%|          | 0/14732 [00:00<?, ? examples/s]

Map:   0%|          | 0/819 [00:00<?, ? examples/s]

Map:   0%|          | 0/818 [00:00<?, ? examples/s]

In [10]:
samsum_train_dataset_clean["train"][5]


{'id': '13716343',
 'dialogue': "Neville: Hi there, does anyone remember what date I got married on? Don: Are you serious? Neville: Dead serious. We're on vacation, and Tina's mad at me about something. I have a strange suspicion that this might have something to do with our wedding anniversary, but I have nowhere to check. Wyatt: Hang on, I'll ask my wife. Don: Haha, someone's in a lot of trouble. Wyatt: September 17. I hope you remember the year .",
 'summary': "Wyatt reminds Neville his wedding anniversary is on the 17th of September. Neville's wife is upset and it might be because Neville forgot about their anniversary."}

In [11]:
def preprocessData(records, tokenizer, max_length_preprocess=128):
    sources = records["dialogue"]
    targets = records["summary"]

    input_encoding = tokenizer(sources, max_length=max_length_preprocess*8, padding="max_length", truncation=True)
    with tokenizer.as_target_tokenizer():
        output_encoding = tokenizer(targets, max_length=max_length_preprocess, padding="max_length", truncation=True)

    # Return as lists to ensure compatibility with DataLoader
    return {
        "input_ids": input_encoding["input_ids"],
        "attention_mask": input_encoding["attention_mask"],
        "labels": output_encoding["input_ids"],
    }

train_dataset = samsum_train_dataset_clean["train"].map(lambda x: preprocessData(x, tokenizer), batched=True)
validation_dataset = samsum_validate_dataset_clean["validation"].map(lambda x: preprocessData(x, tokenizer), batched=True)
test_dataset = samsum_test_dataset_clean["test"].map(lambda x: preprocessData(x, tokenizer), batched=True)

Map:   0%|          | 0/14732 [00:00<?, ? examples/s]



Map:   0%|          | 0/818 [00:00<?, ? examples/s]

Map:   0%|          | 0/819 [00:00<?, ? examples/s]

In [12]:
!pip install evaluate

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [13]:
!pip install rouge_score


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=b895ca5ab32612e3059b193d24db2900b207b031365d815e30d0ff1f653e6c94
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [14]:
!pip install torch

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [15]:
!pip install rouge

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [16]:
import evaluate
def compute_metrics(pred):
    rouge = evaluate.load("rouge")
    labels_ids = pred.label_ids
    pred_ids = pred.predictions[0]

    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(
        predictions=pred_str,
        references=label_str,
        rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"],
    )

    return {
        "R1": round(rouge_output["rouge1"], 4),
        "R2": round(rouge_output["rouge2"], 4),
        "RL": round(rouge_output["rougeL"], 4),
        "RLsum": round(rouge_output["rougeLsum"], 4),
    }

In [17]:
def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak.
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels

In [18]:
from transformers import BartForConditionalGeneration, TrainingArguments, Trainer, EarlyStoppingCallback
import torch
import rouge
import numpy as np
import evaluate
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results/pre-trained-model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_steps=500,
    learning_rate=5e-5,
    weight_decay= 0.01,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_R1",
    greater_is_better=True,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# Train the model
trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained("./finetuned_bart_samsum")
tokenizer.save_pretrained("./finetuned_bart_samsum")

  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,R1,R2,Rl,Rlsum
1,1.0891,0.377189,0.5914,0.3255,0.5596,0.5594
2,0.4131,0.365755,0.6008,0.3387,0.5716,0.5716
3,0.3735,0.354133,0.6115,0.3501,0.5832,0.5828
4,0.3508,0.352411,0.6127,0.3528,0.5826,0.5826
5,0.3383,0.349734,0.617,0.3569,0.5877,0.5874


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].


('./finetuned_bart_samsum/tokenizer_config.json',
 './finetuned_bart_samsum/special_tokens_map.json',
 './finetuned_bart_samsum/vocab.json',
 './finetuned_bart_samsum/merges.txt',
 './finetuned_bart_samsum/added_tokens.json',
 './finetuned_bart_samsum/tokenizer.json')

In [19]:
# Evaluate the model
results = trainer.evaluate(eval_dataset=validation_dataset)
print(results)


{'eval_loss': 0.34973397850990295, 'eval_R1': 0.617, 'eval_R2': 0.3569, 'eval_RL': 0.5877, 'eval_RLsum': 0.5874, 'eval_runtime': 21.9773, 'eval_samples_per_second': 37.22, 'eval_steps_per_second': 2.366, 'epoch': 5.0}


In [20]:
# Model evaluating using ROUGE
from evaluate import load
import torch

# Load ROUGE metric
rouge = load("rouge")

# Function to generate predictions
def generate_predictions(model, tokenizer, dataset):
    predictions = []
    references = []

    for example in dataset:
        # Prepare the input dialogue
        inputs = tokenizer(
            example["dialogue"],
            return_tensors="pt",
            max_length=512,
            truncation=True,
            padding="max_length"
        )

        # Move inputs to GPU if available
        inputs = {k: v.to("cuda") for k, v in inputs.items()} if torch.cuda.is_available() else inputs

        # Generate summary
        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=128,
                min_length=30,
                do_sample=False
            )

        # Decode the generated summary
        generated_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Append generated summary and reference summary
        predictions.append(generated_summary)
        references.append(example["summary"])

    return predictions, references

# Generate predictions and references
test_predictions, test_references = generate_predictions(model, tokenizer, validation_dataset)

# Compute ROUGE scores
rouge_results = rouge.compute(predictions=test_predictions, references=test_references)

# Print ROUGE scores
print("ROUGE Scores:")
for key, value in rouge_results.items():
    print(f"{key}: {value:.4f}")


ROUGE Scores:
rouge1: 0.4612
rouge2: 0.2285
rougeL: 0.3680
rougeLsum: 0.3680


In [2]:
#Load the TweetSum dataset
from datasets import load_dataset

tweetsum_train = load_dataset("csv", data_files={"train": "/kaggle/input/tweetsum/tweetsum_train.csv"})
tweetsum_test = load_dataset("csv", data_files={"test": "/kaggle/input/tweetsum/tweetsum_test.csv"})
tweetsum_validate = load_dataset("csv", data_files={"validation": "/kaggle/input/tweetsum/tweetsum_valid.csv"})

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [3]:
tweetsum_train

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 879
    })
})

In [4]:
tweetsum_test

DatasetDict({
    test: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 110
    })
})

In [5]:
tweetsum_validate

DatasetDict({
    validation: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 110
    })
})

In [22]:
tweetsum_train["train"][3]

{'id': 4,
 'dialogue': ' customer:  Wow, expected 4 packages yesterday, but only 2 showed up. 50% failure rate-not impressed. Glad I paid for fast shipping. customer:  Last month driver delivered box, sat in van for 10 min on phone, and drove off. Then got notice that package was "lost in transit". support: I\'m sorry you only received two of the orders. Is this happening with the same carrier each time? We can see what options are available for the lost items, reach us by phone or chat here:  ^MG customer: Problems are mostly with AMZL US delivery, but this time it\'s UPS. UPS is usually pretty good. customer: Also, the item isn\'t lost. I watched them deliver it. Status still says "lost in transit", though. No big deal, but it makes Amzn look silly support: Okay, thanks for that info, Dave. Have you received any updates via e-mail for the last two packages you were expecting? Let us know. We\'d like to help in any way we can! ^SM customer: Got shipping refunded via chat, and packages

In [23]:
tweetsum_test["test"][0]

{'id': 1,
 'dialogue': " customer: My watchlist is not updating with new episodes (past couple days).  Any idea why? support: Apologies for the trouble, Norlene! We're looking into this. In the meantime, try navigating to the season / episode manually. customer: Tried logging out/back in, that didn’t help support: Sorry! 😔 We assure you that our team is working hard to investigate, and we hope to have a fix ready soon! customer: Thank you! Some shows updated overnight, but others did not... support: We definitely understand, Norlene. For now, we recommend checking the show page for these shows as the new eps will be there customer: As of this morning, the problem seems to be resolved. Watchlist updated overnight with all new episodes. Thank you for your attention to this matter! I love Hulu 💚 support: Awesome! That's what we love to hear. If you happen to need anything else, we'll be here to support! 💚",
 'summary': 'Customer is complaining that the watchlist is not updated with new ep

In [24]:
tweetsum_validate["validation"][0]

{'id': 1,
 'dialogue': ' customer: hey, any explanation why the "Create similar playlist" function doesn\'t work anymore for me? MacBook, v1.0.64.399.g4637b02a. support: Hi there, the cavalry\'s here! Does logging out, restarting your device, and logging back into Spotify help? Keep us in the loop /JI customer: no, it didn\'t :( tried everything but I still can\'t create the playlist. it\'s not even greyed out but nothing happens after clicking on it. support: Okay. Can we have you try reinstalling the app? To do so, just follow the steps at  Let us know how it goes /JI customer: i tried and it\'s still the same... moreover, my song history is always empty, so I can\'t find songs from previous Discover playlists :( support: Does restarting your computer help at all? Also, is the song history you\'re referring to the History tab on your Play Queue? /MT customer: no, I tried that as well and just reinstalled again - didn\'t help. yes, that\'s what I mean. support: Could you DM us your ac

In [25]:
# Preprocessing with the TweetSUM dataset
tweetsum_train_clean = tweetsum_train.map(preprocess_missingchar_and_icon)
tweetsum_test_clean = tweetsum_test.map(preprocess_missingchar_and_icon)
tweetsum_validate_clean = tweetsum_validate.map(preprocess_missingchar_and_icon)

Map:   0%|          | 0/879 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

In [26]:
tweetsum_train_clean["train"][10]

{'id': 11,
 'dialogue': "customer: these biscuits a couple of weeks ago, only just opened and looked at the best before date. . support: We can't see a picture of the best before date or biscuits attached to your tweets here. Please can you tweet or DM it to us again? Thanks. customer: . customer: . customer: . support: We'd certainly like to take a closer look into this. Please DM us a picture of your full receipt. customer: Don’t have a receipt as they were bought for my grandparents 3 weeks ago and have only just realised the date on them. customer: The biscuits were purchased at the Marks and Spencer’s Store at the Ricoh Arena, Coventry. support: Did yo use a sparks card on your transaction, Cian? customer: Yes. support: Hi Cian. I'm really sorry to see this, especially when it was such a lovely gesture too! No worries though - we got your back 1/3. support: Is there a barcode on the tin anywhere? If there is, could you take a pic and DM it to us please &amp; also let us know how m

In [27]:
from transformers import BartForConditionalGeneration, BartTokenizer

model_pretrained = BartForConditionalGeneration.from_pretrained("./finetuned_bart_samsum")
tokenizer_pretrained = BartTokenizer.from_pretrained("./finetuned_bart_samsum")
model_pretrained.resize_token_embeddings(len(tokenizer_pretrained))

BartScaledWordEmbedding(50265, 768, padding_idx=1)

In [28]:
tweetsum_train_dataset = tweetsum_train_clean["train"].map(lambda x: preprocessData(x, tokenizer), batched=True)
tweetsum_validation_dataset = tweetsum_validate_clean["validation"].map(lambda x: preprocessData(x, tokenizer), batched=True)
tweetsum_test_dataset = tweetsum_test_clean["test"].map(lambda x: preprocessData(x, tokenizer), batched=True)

Map:   0%|          | 0/879 [00:00<?, ? examples/s]



Map:   0%|          | 0/110 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

In [29]:
def compute_metrics_finetune(pred):
    rouge = evaluate.load("rouge")
    labels_ids = pred.label_ids
    pred_ids = pred.predictions[0]

    pred_str = tokenizer_pretrained.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer_pretrained.pad_token_id
    label_str = tokenizer_pretrained.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(
        predictions=pred_str,
        references=label_str,
        rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"],
    )

    return {
        "R1": round(rouge_output["rouge1"], 4),
        "R2": round(rouge_output["rouge2"], 4),
        "RL": round(rouge_output["rougeL"], 4),
        "RLsum": round(rouge_output["rougeLsum"], 4),
    }

In [30]:
from transformers import BartForConditionalGeneration, TrainingArguments, Trainer, EarlyStoppingCallback

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results/fine-tuned-model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_steps=500,
    learning_rate=0.5*(5e-5),
    weight_decay= 0.01,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_R1",
    greater_is_better=True,
)

# Initialize the Trainer
trainer = Trainer(
    model=model_pretrained,
    args=training_args,
    train_dataset=tweetsum_train_dataset,
    eval_dataset=tweetsum_validation_dataset,
    compute_metrics=compute_metrics_finetune,
    tokenizer=tokenizer_pretrained,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# Train the model
trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained("./finetuned_bart_tweetsum")
tokenizer.save_pretrained("./finetuned_bart_tweetsum")

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,R1,R2,Rl,Rlsum
1,No log,0.684559,0.5873,0.2994,0.5458,0.5457
2,No log,0.644904,0.5949,0.3124,0.5538,0.5537
3,No log,0.641341,0.5973,0.3255,0.5578,0.5584
4,No log,0.639831,0.5947,0.3223,0.5606,0.5607
5,0.698800,0.624113,0.5994,0.3258,0.5617,0.5623
6,0.698800,0.620119,0.597,0.3248,0.5587,0.5596
7,0.698800,0.622525,0.5993,0.3298,0.5633,0.5644
8,0.698800,0.622855,0.6016,0.3314,0.5639,0.5649
9,0.698800,0.623293,0.6006,0.3295,0.5629,0.5636
10,0.574700,0.62303,0.6012,0.3301,0.5642,0.5652


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].


('./finetuned_bart_tweetsum/tokenizer_config.json',
 './finetuned_bart_tweetsum/special_tokens_map.json',
 './finetuned_bart_tweetsum/vocab.json',
 './finetuned_bart_tweetsum/merges.txt',
 './finetuned_bart_tweetsum/added_tokens.json',
 './finetuned_bart_tweetsum/tokenizer.json')

In [31]:
# Model evaluating using ROUGE
from evaluate import load
import torch

# Load ROUGE metric
rouge = load("rouge")

# Function to generate predictions
def generate_predictions(model, tokenizer, dataset):
    predictions = []
    references = []

    for example in dataset:
        # Prepare the input dialogue
        inputs = tokenizer(
            example["dialogue"],
            return_tensors="pt",
            max_length=512,
            truncation=True,
            padding="max_length"
        )

        # Move inputs to GPU if available
        inputs = {k: v.to("cuda") for k, v in inputs.items()} if torch.cuda.is_available() else inputs

        # Generate summary
        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=128,
                min_length=30,
                do_sample=False
            )

        # Decode the generated summary
        generated_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Append generated summary and reference summary
        predictions.append(generated_summary)
        references.append(example["summary"])

    return predictions, references

# Generate predictions and references
test_predictions, test_references = generate_predictions(model_pretrained, tokenizer_pretrained, tweetsum_validation_dataset)

# Compute ROUGE scores
rouge_results = rouge.compute(predictions=test_predictions, references=test_references)

# Print ROUGE scores
print("ROUGE Scores:")
for key, value in rouge_results.items():
    print(f"{key}: {value:.4f}")

ROUGE Scores:
rouge1: 0.4339
rouge2: 0.2081
rougeL: 0.3727
rougeLsum: 0.3733


In [36]:
!pip install zip_files
!zip-folder --auto-root --outfile /kaggle/working/run.zip /kaggle/working/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [37]:
from IPython.display import FileLink
FileLink(r'run.zip')