## Improve Dialogue Summary using Prompt Engineering

### 1. Import Packages

In [1]:
import os
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

  from .autonotebook import tqdm as notebook_tqdm


### 2. Import DialogSum Dataset from Hugging Face

In [2]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

Print sample dialogs with human-generated summaries

In [5]:
idx = 0

dash_line = '-'.join('' for x in range(100))

print(dash_line)
print('Example ', idx + 1)
print(dash_line)
print('INPUT DIALOGUE:')
print(dataset['test'][idx]['dialogue'])
print(dash_line)
print('BASELINE HUMAN SUMMARY:')
print(dataset['test'][idx]['summary'])
print(dash_line)
print()

---------------------------------------------------------------------------------------------------
Example  1
---------------------------------------------------------------------------------------------------
INPUT DIALOGUE:
#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to c

### 3. Load Pre-trained Large Language Model (LLM) FLAN-T5 from Hugging Face

In [6]:
model_name='google/flan-t5-base'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)



Check the number of parameters of the LLM Model

In [18]:
def num_model_parameters(model):
    """
    Print the number of model parameters
    
    Argument: model
    Return: number of trainable paramters
    """

    trainable_model_params = 0
    all_model_params = 0

    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()

    return trainable_model_params, all_model_params

In [19]:
train_param, all_param = num_model_parameters(model)
print(f"Total number of parameters: {all_param}")
print(f"Total number of trainable parameters: {train_param}")

Total number of parameters: 247577856
Total number of trainable parameters: 247577856


Download the tokenizer useed in pretrained model

In [20]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)



Test some encoding and decoding

In [21]:
sentence = "Have a nice day"

sentence_encoded = tokenizer(sentence, return_tensors='pt')

sentence_decoded = tokenizer.decode(
        sentence_encoded["input_ids"][0], 
        skip_special_tokens=True
    )

print('ENCODED SENTENCE:')
print(sentence_encoded["input_ids"][0])
print('\nDECODED SENTENCE:')
print(sentence_decoded)

ENCODED SENTENCE:
tensor([2114,    3,    9, 1245,  239,    1])

DECODED SENTENCE:
Have a nice day


### 4. Use the Pre-trained Model for Summarization Task

In [51]:
def summarize_dialogue(dataset, model, tokenizer, idx):
    dialogue = dataset['test'][idx]['dialogue']
    summary = dataset['test'][idx]['summary']

    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )

    print(dash_line)
    print('Example ', idx)
    print(dash_line)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

In [53]:
summarize_dialogue(dataset, model, tokenizer, idx=40)

---------------------------------------------------------------------------------------------------
Example  40
---------------------------------------------------------------------------------------------------
INPUT PROMPT:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------
MODEL GENERATION - WITHOUT PROMPT ENGINEERING:
Person1: It's ten to nine.



### 5. Summarize Dialogue with Instruction Prompt

### 5.1. Zero-shot Inference
Here we added the text "Summarize the following conversation" and "Summary:" to the prompt to help imporve the summary.

In [47]:
def zero_shot_inference(dataset, model, tokenizer, idx):
    dialogue = dataset['test'][idx]['dialogue']
    summary = dataset['test'][idx]['summary']

    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
    """

    # Input constructed prompt instead of the dialogue.
    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Example ', idx)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)    
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

In [54]:
zero_shot_inference(dataset, model, tokenizer, idx=40)

---------------------------------------------------------------------------------------------------
Example  40
---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

Summary:
    
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------
MODEL GENERATION - ZERO SHOT:
The train is about t

### 5.2. One-Shot Inference

In [76]:
def make_prompt(idx_1, idx_2):
    prompt = ''
    dialogue = dataset['test'][idx_1]['dialogue']
    summary = dataset['test'][idx_1]['summary']

    prompt += f"""
Dialogue:

{dialogue}

What was going on?
{summary}


"""
    
    dialogue = dataset['test'][idx_2]['dialogue']
    
    prompt += f"""
Dialogue:

{dialogue}

What was going on?
"""
        
    return prompt

In [80]:
def one_shot_inference(dataset, model, tokenizer, idx_1, idx_2):

    summary = dataset['test'][idx_2]['summary']

    one_shot_prompt = make_prompt(idx_1, idx_2)
    inputs = tokenizer(one_shot_prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash_line)
    print(f'MODEL GENERATION - FEW SHOT:\n{output}')

Print one-shot inference prompt

In [81]:
print(make_prompt(50, 40))


Dialogue:

#Person1#: Yeah. Just pull on this strip. Then peel off the back.
#Person2#: You might make a few enemies this way.
#Person1#: If they don't think this is fun, they're not meant to be our friends.
#Person2#: You mean your friends. I think it's cruel.
#Person1#: Yeah. But it's fun. Look at those two ugly old ladies. . . or are they men?
#Person2#: Hurry! Get a shot!. . . Hand it over!
#Person1#: I knew you'd come around. . .

What was going on?
#Person1# is about to make a prank. #Person2# thinks it's cruel at first but then joins.



Dialogue:

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

What was going on?



Print human and model summary

In [82]:
one_shot_inference(dataset, model, tokenizer, idx_1=50, idx_2=40)

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.

---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
Tom is late for the train. He has to catch it at 9:30.


### 5.3. Few-Shot Inference

In [88]:
def make_prompt2(idx_array, idx_2):
    prompt = ''
    for index in idx_array:
        dialogue = dataset['test'][index]['dialogue']
        summary = dataset['test'][index]['summary']

        prompt += f"""
Dialogue:

{dialogue}

What was going on?
{summary}


"""
    
    dialogue = dataset['test'][idx_2]['dialogue']
    
    prompt += f"""
Dialogue:

{dialogue}

What was going on?
"""
        
    return prompt

In [89]:
def few_shot_inference(dataset, model, tokenizer, idx_array, idx_2):

    summary = dataset['test'][idx_2]['summary']

    one_shot_prompt = make_prompt(idx_array, idx_2)
    inputs = tokenizer(one_shot_prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash_line)
    print(f'MODEL GENERATION - FEW SHOT:\n{output}')

In [90]:
idx_array = [50, 60]
idx_2 = 40

Print the few-shot inference prompt

In [92]:
print(make_prompt2(idx_array, idx_2))


Dialogue:

#Person1#: Yeah. Just pull on this strip. Then peel off the back.
#Person2#: You might make a few enemies this way.
#Person1#: If they don't think this is fun, they're not meant to be our friends.
#Person2#: You mean your friends. I think it's cruel.
#Person1#: Yeah. But it's fun. Look at those two ugly old ladies. . . or are they men?
#Person2#: Hurry! Get a shot!. . . Hand it over!
#Person1#: I knew you'd come around. . .

What was going on?
#Person1# is about to make a prank. #Person2# thinks it's cruel at first but then joins.



Dialogue:

#Person1#: Hey, Frank. I heard you got a new job.
#Person2#: Yeah, Judy. I will be working for the Post Office. It's not a bad job.
#Person1#: Is it true that you have a heavy work schedule?
#Person2#: That's right. I am supposed to work at 5am everyday, and I only get 45 minutes for lunch.
#Person1#: So, why did you apply for such a demanding job?
#Person2#: Well, the government offers its employees excellent health insurance benefi

In [93]:
few_shot_inference(dataset, model, tokenizer, idx_array, idx_2)

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.

---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
Tom is late for the train. He has to catch it at 9:30.
