In [1]:
import torch
from transformers import Trainer, TrainingArguments
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import os
from datasets import load_dataset,load_metric
# from evaluate import load_metric
from sklearn.model_selection import train_test_split

device = 'cuda:0'
import emoji
emoji_list = emoji.EMOJI_DATA.keys()
emoji_descriptions = [emoji.demojize(e, delimiters=("<", ">")) for e in emoji_list]

### Loading the base models from Huggingface


In [None]:
model_name = "facebook/bart-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

new_tokens = ['<file_photo>','<file_picture>','<file_other>','<file_video>','<file_image>','<file_gif>']
new_tokens = new_tokens+emoji_descriptions
tokenizer.add_special_tokens({'additional_special_tokens': new_tokens})
model.resize_token_embeddings(len(tokenizer))

### Loading the SAMSum dataset

In [2]:
dataset = load_dataset("samsum")

train_data = dataset["train"]
validation_data = dataset["validation"] 
test_data = dataset["test"]

### Tokenizing Function

In [5]:
def tokenize_function(tokenizer,examples):
    input_dialogues = ["Summarize dialogue >>\n " + emoji.demojize(dialogue, delimiters=("<", ">")) for dialogue in examples["dialogue"]]
    inputs = tokenizer(input_dialogues, padding="max_length", truncation=True, max_length=1000)
    targets = tokenizer(examples["summary"], padding="max_length", truncation=True, max_length=100)
    return {"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "labels": targets["input_ids"]}


In [6]:
train_data_tokenized = train_data.map(lambda examples: tokenize_function(tokenizer, examples), batched=True)
validation_data_tokenized = validation_data.map(lambda examples: tokenize_function(tokenizer, examples), batched=True)
test_data_tokenized = test_data.map(lambda examples: tokenize_function(tokenizer, examples), batched=True)

### Sample summarization code

In [7]:
def summarize(tokenizer,model,text):
    print(f"Summarize dialogue >>\n {emoji.demojize(text, delimiters=('<', '>'))}")
    inputs = tokenizer(f"Summarize dialogue >>\n {emoji.demojize(text, delimiters=('<', '>'))}", return_tensors="pt", max_length=1000, truncation=True, padding="max_length").to(device)
    
    # Generate summary
    summary_ids = model.generate(inputs.input_ids, num_beams=4, max_length=100, early_stopping=True)
    # Decode the summary
    summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
    
    return summary[0]


### Model - BART

In [8]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Trainable parameters:", trainable_params)        
#247577856/142329600

Trainable parameters: 142329600


### Freezing all layers and keeping encoder weights as trainable

In [9]:
# Freeze all parameters first
for param in model.parameters():
    param.requires_grad = False

# Unfreeze encoder layers
for param in model.model.encoder.parameters():
    param.requires_grad = True

# Check trainable parameters 
for name, param in model.named_parameters():
    print(f"{name} is {'trainable' if param.requires_grad else 'frozen'}")


model.shared.weight is trainable
model.encoder.embed_positions.weight is trainable
model.encoder.layers.0.self_attn.k_proj.weight is trainable
model.encoder.layers.0.self_attn.k_proj.bias is trainable
model.encoder.layers.0.self_attn.v_proj.weight is trainable
model.encoder.layers.0.self_attn.v_proj.bias is trainable
model.encoder.layers.0.self_attn.q_proj.weight is trainable
model.encoder.layers.0.self_attn.q_proj.bias is trainable
model.encoder.layers.0.self_attn.out_proj.weight is trainable
model.encoder.layers.0.self_attn.out_proj.bias is trainable
model.encoder.layers.0.self_attn_layer_norm.weight is trainable
model.encoder.layers.0.self_attn_layer_norm.bias is trainable
model.encoder.layers.0.fc1.weight is trainable
model.encoder.layers.0.fc1.bias is trainable
model.encoder.layers.0.fc2.weight is trainable
model.encoder.layers.0.fc2.bias is trainable
model.encoder.layers.0.final_layer_norm.weight is trainable
model.encoder.layers.0.final_layer_norm.bias is trainable
model.encoder

#### Training

In [10]:
output_dir = "./bart_base_encoder_finetune_emoji"


In [11]:
training_args = TrainingArguments(
    overwrite_output_dir = False,
    output_dir=output_dir,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
#     evaluation_strategy="steps",
    logging_dir=f"{output_dir}/logs",
    logging_steps=500,
    save_strategy="epoch",
    save_total_limit=3,
    num_train_epochs=10,
    learning_rate=1e-4,
)

rouge_metric = load_metric("rouge")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data_tokenized,
    eval_dataset=validation_data_tokenized,
    compute_metrics=lambda pred: rouge_metric.compute(predictions=pred.predictions, references=pred.label_ids)
)


  rouge_metric = load_metric("rouge")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [12]:
trainer.train(resume_from_checkpoint = True)



Step,Training Loss
33500,0.2096
34000,0.2036
34500,0.2011
35000,0.2043
35500,0.2021
36000,0.2093
36500,0.2093


TrainOutput(global_step=36830, training_loss=0.020547674481000647, metrics={'train_runtime': 1340.8394, 'train_samples_per_second': 109.871, 'train_steps_per_second': 27.468, 'total_flos': 8.77210693632e+16, 'train_loss': 0.020547674481000647, 'epoch': 10.0})

In [None]:
trainer.save_model('./bart_base_encoder_finetune_emoji_save')

model_tok_save_directory = "./bart_base_encoder_finetune_model_tokenizer"
model.save_pretrained(model_tok_save_directory)
tokenizer.save_pretrained(model_tok_save_directory)

tokenizer.save_pretrained('./tokenizer-emoji')


eval_trainer = Trainer(
    model=model,
    eval_dataset=validation_data_tokenized,
    compute_metrics=lambda pred: rouge_metric.compute(predictions=pred.predictions, references=pred.label_ids)
)

# Evaluate the model on the test set
results = eval_trainer.evaluate(test_data)
print("ROUGE scores:", results)

In [15]:
model_name = "facebook/bart-base"
orignal_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
orignal_model_tok = AutoTokenizer.from_pretrained(model_name)


### ROUGE SCORE -- inference


In [3]:
def summarize(tokenizer,model,text):
    """
    Summarizes the given text using the provided tokenizer and model.

    Args:
        tokenizer (Tokenizer): The tokenizer used to tokenize the input text.
        model (Model): The model used for summarization.
        text (str): The text to be summarized.

    Returns:
        list: A list containing the summarized text.    
    """    
#     print(f"Summarize dialogue >>\n {emoji.demojize(text, delimiters=('<', '>'))}")
    inputs = tokenizer(f"Summarize dialogue >>\n {emoji.demojize(text, delimiters=('<', '>'))}", return_tensors="pt", max_length=1000, truncation=True, padding="max_length").to(device)
    
    # Generate summary
    summary_ids = model.generate(inputs.input_ids, num_beams=4, max_length=100, early_stopping=True)
    # Decode the summary
    summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
    
    return summary


generated_summaries = []
actual_summaries = []
generated_summary_orignal = []
dialogue_list = []
SAVED_MODEL_PATH = './bart_base_encoder_finetune_model_tokenizer'
SAVED_TOK_PATH = 'tokenizer-emoji'
SAVED_MODEL_TOK = BartTokenizer.from_pretrained(SAVED_MODEL_PATH)#.to(device)
SAVED_MODEL = AutoModelForSeq2SeqLM.from_pretrained(SAVED_MODEL_PATH).to(device)


model_name = "facebook/bart-large-cnn"
orignal_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
orignal_model_tok = AutoTokenizer.from_pretrained(model_name)


Some weights of BartForConditionalGeneration were not initialized from the model checkpoint at facebook/bart-large-cnn and are newly initialized: ['model.shared.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
sum(p.numel() for p in bart_base_model.parameters() if p.requires_grad),sum(p.numel() for p in SAVED_MODEL.parameters() if p.requires_grad)

(139420416, 139420416)

### Inference loop to generate summaries on test dataset for ROUGE metrics

In [None]:
import time
start_time = time.time()
i=1
j=0
for example in test_data:
    if i%10==0:
#         print()
        j+=10
        print(f"samples summarized:{j}\ttime:{time.time()-start_time}")
        
    generated_summary = summarize(SAVED_MODEL_TOK,SAVED_MODEL,example['dialogue'])
    generated_summaries.append(generated_summary[0])
    generated_summary_o = summarize(orignal_model_tok,orignal_model,example['dialogue'])
    generated_summary_orignal.append(generated_summary_o[0])
    actual_summaries.append(example["summary"])
    dialogue_list.append(example['dialogue'])
    
    i+=1
    
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time} seconds")

samples summarized:10	time:13.758821725845337
samples summarized:20	time:28.998841762542725
samples summarized:30	time:44.66907620429993
samples summarized:40	time:59.83872199058533
samples summarized:50	time:75.43314599990845
samples summarized:60	time:89.36456394195557
samples summarized:70	time:103.62737607955933
samples summarized:80	time:118.56200432777405
samples summarized:90	time:133.41441798210144
samples summarized:100	time:147.89073586463928
samples summarized:110	time:161.93118524551392
samples summarized:120	time:176.86285734176636
samples summarized:130	time:191.47789239883423
samples summarized:140	time:206.33463406562805
samples summarized:150	time:220.77889227867126
samples summarized:160	time:236.15363025665283
samples summarized:170	time:249.60998916625977
samples summarized:180	time:264.09212470054626
samples summarized:190	time:279.32600688934326
samples summarized:200	time:294.1669371128082
samples summarized:210	time:309.57530903816223
samples summarized:220	time

## ROUGE for orignal base model

In [6]:
from rouge_score import rouge_scorer

start_time = time.time()

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = [scorer.score(actual_summary, gen_summary) for actual_summary, gen_summary in zip(actual_summaries, generated_summary_orignal)]
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time} seconds")

start_time = time.time()

# To calculate average scores
average_scores = {}
for key in scores[0].keys():
    average_scores[key] = sum(score[key].fmeasure for score in scores) / len(scores)

    
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time} seconds")    
print("Average ROUGE scores:")
for key, value in average_scores.items():
    print(f"{key}: {value}")


Total time taken: 1.0033118724822998 seconds
Total time taken: 0.0003552436828613281 seconds
Average ROUGE scores:
rouge1: 0.30412814809078337
rouge2: 0.10309376001724774
rougeL: 0.22742836389781787


In [7]:
import pandas as pd


temp_df = pd.DataFrame({'finetune_summary':generated_summaries,'original_summary':generated_summary_orignal,'human_summary':actual_summaries})

In [1]:
import pandas as pd
df = pd.read_csv("BART_BASE_encoder_only.csv")

In [8]:
temp_df.to_csv('BART_BASE_encoder_only.csv')#.loc[3][0]

### ROUGE for fine-tuned model

In [2]:
generated_summaries = df['finetune_summary']
actual_summaries = df['human_summary']
from rouge_score import rouge_scorer
import time
start_time = time.time()

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = [scorer.score(actual_summary, gen_summary) for actual_summary, gen_summary in zip(actual_summaries, generated_summaries)]
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time} seconds")

start_time = time.time()

# To calculate average scores
average_scores = {}
for key in scores[0].keys():
    average_scores[key] = sum(score[key].fmeasure for score in scores) / len(scores)

    
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time} seconds")    
print("Average ROUGE scores:")
for key, value in average_scores.items():
    print(f"{key}: {value}")


Total time taken: 0.6223886013031006 seconds
Total time taken: 0.00037479400634765625 seconds
Average ROUGE scores:
rouge1: 0.4990108195083427
rouge2: 0.2529090060055536
rougeL: 0.41599119708693794
