In [1]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

In [2]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

Downloading readme:   0%|          | 0.00/4.65k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

### Sample Data

In [11]:

for i in range(10):
    print("conversation:\n",dataset['train']['dialogue'][i])
    print("\n\nsummary:\n",dataset['train']['summary'][i])
    print("=".center(100))

conversation:
 #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.


summary:
 Mr. Smith's getting a check-up, and Docto

## Loading Model

In [17]:
model_name='google/flan-t5-base'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [18]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

## Sample tokens

In [23]:
sentence = "this is testing"

tokenized = tokenizer(sentence, return_tensors="pt")

print(f"sentence : {sentence}")
print(f"tokenized sentence : {tokenized.get('input_ids')}")

tokens_decoded = tokenizer.decode(tokenized.get("input_ids")[0])
print(f"decoded : {tokens_decoded}")

sentence : this is testing
tokenized sentence : tensor([[  48,   19, 2505,    1]])
decoded : this is testing</s>


## [ZERO SHOT] Predicting without any prompt engineering

In [100]:
def predict(text, config=None):
    inputs = tokenizer(text, return_tensors="pt")
    inputs = inputs.get("input_ids")
    pred = model.generate(inputs, max_new_tokens=50, generation_config=config)[0]
    
    tokens_decoded = tokenizer.decode(pred, skip_special_tokens=True)
    print(f"output : {tokens_decoded}")

In [85]:
def get_prompt(text):
    return f"""
        Summerize below conversation:
        {text}

        Summary:
    """


def get_prompt2(text):
    return f"""
        Summerize below conversation:
        {text}

        what is summary of above conversation between person1 and person2
    """

def get_n_shot_prompt(index, n=1):

    prompt = ""
    for i in range(n):
            
        temp = f"""
        Summerize below conversation:
        
        {dataset['train']['dialogue'][i]}
    
        SUMMERY:
        {dataset['train']['summary'][i]}
            
        """
        prompt += temp

    
    
    sentence = f"""
        Summerize below conversation:
        
        {dataset['train']['dialogue'][index]}
    
        SUMMERY:
        
    """
    
    prompt += sentence
    return prompt

In [59]:
prompt = get_prompt(
    dataset['train']['dialogue'][10]
)
predict(prompt)

output : #Person1#: I need some sugar, oranges, and milk. #Person2#: Okay.


## [ZERO SHOT] Predicting with prompt engineering

In [43]:
prompt = get_prompt(
    dataset['train']['dialogue'][1]
)
predict(prompt)

output : <pad> Ricky is coming to the doctor's office for his vaccinations.</s>


## [ONE SHOT] Prediction

In [46]:
dataset['train']['summary'][0]

"Mr. Smith's getting a check-up, and Doctor Hawkins advises him to have one every year. Hawkins'll give some information about their classes and medications to help Mr. Smith quit smoking."

In [92]:

prompt = get_n_shot_prompt(25, 1)
print(prompt)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

In [93]:
predict(prompt)

output : The bar is a good place to meet people.


In [94]:

prompt = get_n_shot_prompt(55, 1)
print(prompt)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

In [96]:
predict(prompt)

output : #Person1#: Bye!


In [98]:

prompt = get_n_shot_prompt(205, 1)
print(prompt)

predict(prompt)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

## [ONE SHOT] Prediction - Temperature EXP

In [107]:
model_config = GenerationConfig(max_new_tokens=50, temperature=1.2)
model_config

GenerationConfig {
  "max_new_tokens": 50,
  "temperature": 1.2,
  "transformers_version": "4.31.0"
}

In [108]:

prompt = get_n_shot_prompt(25, 1)
print(prompt)
predict(prompt, model_config)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

In [109]:

prompt = get_n_shot_prompt(21, 1)
print(prompt)
predict(prompt, model_config)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

In [114]:
model_config = GenerationConfig(max_new_tokens=50, temperature=0.8, do_sample=True)

prompt = get_n_shot_prompt(21, 1)
print(prompt)
predict(prompt, model_config)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

## [FEW SHOT] Prediction

In [86]:

prompt = get_n_shot_prompt(29, 3)
print(prompt)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

In [87]:
predict(prompt)

output : #Person1# will help Person2 with the laundry.


In [90]:

prompt = get_n_shot_prompt(856, 3)
print(prompt)


        Summerize below conversation:
        
        #Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.
#Person2#: Ok, thanks doctor.
    
       

In [91]:
predict(prompt)

output : #Person1# has a game on May 26 and May 30.
