# Experiment overview
- LLM: Flan-T5
- Purpose of use of this LLM - Dialogue Summarization
- Experiment - zero, one, few-shot learning


## Installing and importing necessary libraries

In [1]:
%pip install \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet
%pip install \
    transformers==4.27.2 \
    datasets==2.11.0  --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m893.7 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m64.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.1/317.1 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m60.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.0.2+cu118 requires torch==2.0.1, but you have torch 1.13.1 which is incompati

In [2]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
import random

### Dialouges dataset
[DialogSum](https://paperswithcode.com/dataset/dialogsum) is a large-scale dialogue summarization dataset, consisting of 13,460 dialogues with corresponding manually labeled summaries and topics.

In [3]:
dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(dataset_name)

Downloading readme:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

Downloading and preparing dataset csv/knkarthick--dialogsum to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

### Sample dialogues above dataset

In [4]:
example = random.sample(range(400), 2)

dash = '_'*100

for i, index in enumerate(example):
    print(dash)
    print(f'Example {i + 1}')
    print(dash)
    print('INPUT DIALOGUE:')
    print(dataset['test'][index]['dialogue'])
    print(dash)
    print('BASELINE HUMAN SUMMARY:')
    print(dataset['test'][index]['summary'])
    print(dash)
    print()

____________________________________________________________________________________________________
Example 1
____________________________________________________________________________________________________
INPUT DIALOGUE:
#Person1#: Have you ever been invited to a Chinese feast?
#Person2#: Sure. Beijingers are distinguished for their warmth and hospitality.
#Person1#: As far as you could see, is dining at a Chinese table a distinct experience? To what extent does it differ from that in your homeland?
#Person2#: To begin with, I have to use chopsticks.
#Person1#: But you look quite proficient in using them now.
#Person2#: Certainly. After all, I've been here for two years. It's not much uphill work for a foreigner to get used to Chinese dinner utensils. Though, I do feel hazy about how to behave appropriately at a Chinese table.
#Person1#: Yes, table etiquette. There are a multitude of rules that foreigners might never know until they were told. Some practices even appear somewhat

#### Loading Flan-T5 model

In [5]:
model_name='google/flan-t5-base'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

#### Loading tokenizer of Flan-T5 model

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

#### Let's try to summerize dialogue using flan-t5 without any prompt engineering.

In [7]:
for i, index in enumerate(example):
    dialogue = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']

    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )

    print(dash)
    print('Example ', i + 1)
    print(dash)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

____________________________________________________________________________________________________
Example  1
____________________________________________________________________________________________________
INPUT PROMPT:
#Person1#: Have you ever been invited to a Chinese feast?
#Person2#: Sure. Beijingers are distinguished for their warmth and hospitality.
#Person1#: As far as you could see, is dining at a Chinese table a distinct experience? To what extent does it differ from that in your homeland?
#Person2#: To begin with, I have to use chopsticks.
#Person1#: But you look quite proficient in using them now.
#Person2#: Certainly. After all, I've been here for two years. It's not much uphill work for a foreigner to get used to Chinese dinner utensils. Though, I do feel hazy about how to behave appropriately at a Chinese table.
#Person1#: Yes, table etiquette. There are a multitude of rules that foreigners might never know until they were told. Some practices even appear somewhat 

Testing with zero shot

In [8]:
for i, index in enumerate(example):
    dialogue = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']

    prompt = f"""
Dialogue:

{dialogue}

What was going on?
"""

    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )

    print(dash)
    print('Example ', i + 1)
    print(dash)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash)
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

____________________________________________________________________________________________________
Example  1
____________________________________________________________________________________________________
INPUT PROMPT:

Dialogue:

#Person1#: Have you ever been invited to a Chinese feast?
#Person2#: Sure. Beijingers are distinguished for their warmth and hospitality.
#Person1#: As far as you could see, is dining at a Chinese table a distinct experience? To what extent does it differ from that in your homeland?
#Person2#: To begin with, I have to use chopsticks.
#Person1#: But you look quite proficient in using them now.
#Person2#: Certainly. After all, I've been here for two years. It's not much uphill work for a foreigner to get used to Chinese dinner utensils. Though, I do feel hazy about how to behave appropriately at a Chinese table.
#Person1#: Yes, table etiquette. There are a multitude of rules that foreigners might never know until they were told. Some practices even appe

### One shot Inference






In [9]:
def make_prompt(example_indices_full, example_index_to_summarize):
    prompt = ''
    for index in example_indices_full:
        dialogue = dataset['test'][index]['dialogue']
        summary = dataset['test'][index]['summary']

        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.
        prompt += f"""
Dialogue:

{dialogue}

What was going on?
{summary}


"""

    dialogue = dataset['test'][example_index_to_summarize]['dialogue']

    prompt += f"""
Dialogue:

{dialogue}

What was going on?
"""

    return prompt

In [10]:
example_indices_full = [40]
example_index_to_summarize = 200

one_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)

print(one_shot_prompt)


Dialogue:

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

What was going on?
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.



Dialogue:

#Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to begin with. And you also ne

In [11]:
summary = dataset['test'][example_index_to_summarize]['summary']

inputs = tokenizer(one_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
    skip_special_tokens=True
)

print(dash)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash)
print(f'MODEL GENERATION - ONE SHOT:\n{output}')

____________________________________________________________________________________________________
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

____________________________________________________________________________________________________
MODEL GENERATION - ONE SHOT:
#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to add a CD-ROM drive.


### Few shot Inference

In [12]:
example_indices_full = [40, 80, 120]
example_index_to_summarize = 200

few_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)

print(few_shot_prompt)


Dialogue:

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

What was going on?
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.



Dialogue:

#Person1#: May, do you mind helping me prepare for the picnic?
#Person2#: Sure. Have you checked the weather report?
#Person1#: Yes. It says it will be sunny all day. No sign of rain at all. This is your father's favorite sausage. Sandwiches for you and Daniel.
#Person2#: No, thanks Mom. I'd like some toast and chicken wings.
#Person1#: Okay. Please take some fruit salad and crackers for me.
#Person2#: Done. Oh, don't forget to take napkins disposable plates, cups and picnic blanket.
#Person1#: All set. 

In [13]:
summary = dataset['test'][example_index_to_summarize]['summary']

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
    skip_special_tokens=True
)

print(dash)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')

Token indices sequence length is longer than the specified maximum sequence length for this model (819 > 512). Running this sequence through the model will result in indexing errors


____________________________________________________________________________________________________
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

____________________________________________________________________________________________________
MODEL GENERATION - FEW SHOT:
#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to upgrade his hardware.


In [17]:
# generation_config = GenerationConfig(max_new_tokens=50)
# generation_config = GenerationConfig(max_new_tokens=10)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)
generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        generation_config=generation_config,
    )[0],
    skip_special_tokens=True
)

print(dash)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')
print(dash)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')

____________________________________________________________________________________________________
MODEL GENERATION - FEW SHOT:
Adding a painting program to a software is a good idea, but it would cost a bit more money.
____________________________________________________________________________________________________
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.



## Result
As we can see, while we tried different temperature, few shot inference does not improve much model generation compared to one shot. Improving model performence would be with fine-tuning that we will cover later