In [1]:
import sys
sys.path.insert(0, '../../')

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple, Union
import pandas as pd
from IPython.display import display, Markdown

import candle
import experiments.textgenutils as gutils

## (1) Initialize Model with Pre-trained Weights

In [None]:
# One of ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl']
#    gpt2:         124M params
#    gpt2-medium:  354M params
#    gpt2-large:   774M params
#    gpt2-xl:    1,557M params

model = candle.models.gpt.GPT.from_pretrained('gpt2-large')

## (2) Compare Quality of Sampling Techniques

In [4]:
SAMPLING_TECHNIQUES = [
    # (technique, (beam_size, top_k, top_p))
    ('Beam Size = 2, P = 0.95, Top K = 100', (3, 100, 0.95)),
    ('P = 0.95, Top K = 100', (1, 100, 0.95)),
    ('Nucleus Sampling P = 0.95', (1, None, 0.95)),
    ('Top K = 40', (1, 40, None)),
    ('Beam Size = 10', (10, None, None)),
    ('Pure Sampling', (1, None, None)),
]


def compare_responses_of_sampling_techniques(prompt: str,
                                             n_tokens_to_generate: int = 50,
                                             trials: int = 2,
                                             sampling_techniques: list = SAMPLING_TECHNIQUES):
    tokenizer = candle.models.gpt.GPT2BPETokenizer()

    responses_by_config_df = pd.DataFrame(columns=range(trials), index=[i[0] for i in sampling_techniques])

    for config in sampling_techniques:
        for trial in range(trials):
            (label, (beam_size, top_k, top_p)) = config
            
            model.clear_kv_cache()
            generator = gutils.generate_text(model, tokenizer, prompt,
                                             n_tokens_to_generate=n_tokens_to_generate,
                                             beam_size=beam_size,
                                             top_k=top_k,
                                             top_p=top_p,
                                             use_kv_cache=True)

            responses_by_config_df.loc[label, trial] = ''.join(list(generator))

    responses_by_config_df = responses_by_config_df.stack().reset_index().set_index(['level_0', 'level_1'])
    responses_by_config_df.index.names = ['Sampling Technique', 'Trial']
    responses_by_config_df.columns = ['Response']

    prompt_df = pd.DataFrame([prompt], columns=[''], index=['Prompt'])
    properties = {'text-align': 'left', 'white-space': 'pre-wrap'}
    display(prompt_df.style.set_properties(**properties))
    display(responses_by_config_df.style.set_properties(**properties))

In [5]:
compare_responses_of_sampling_techniques(
    'Once upon a time, there was a cat whose name was Maukoo. He loves eating and cuddling. '
)

Unnamed: 0,Unnamed: 1
Prompt,"Once upon a time, there was a cat whose name was Maukoo. He loves eating and cuddling."


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,"He is a friendly, cuddly, and cuddly cat. Maukoo is the kind of cat that will let you cuddle him while he's eating. Maukoo is a cat that loves"
"Beam Size = 2, P = 0.95, Top K = 100",1,And he's a happy cat. But a lot of people are not as fond of him as they used to be. He is not a friendly cat. He is not a playful cat. He is a cat whose
"P = 0.95, Top K = 100",0,"He loves to be petted and he loves to be stroked. In all the kitties I've seen Maukoo, he is the only one who looks at me and loves me. So whenever you see a large number"
"P = 0.95, Top K = 100",1,"Now it is the year 2017, and Maukoo is still eating. What was once a year-long retreat has become a two-week long, three-day-a-week, 24/7, never-ending, daily"
Nucleus Sampling P = 0.95,0,"In fact, he loves all of life, but especially so when he is invited to a workplace for breakfast or lunch. He's pretty happy to be there, but a lot of his former owners and co-workers notice that he has"
Nucleus Sampling P = 0.95,1,"He used to get all crowded in his cage, but He would then quickly shut the door and run home. After a day at the parks, he'd get stuck in the fence and then burn his feet and bowels."
Top K = 40,0,He will sit on your lap and cuddle up to you while you write your memoir. It's really easy to make your own: Maukoo is very soft and friendly. You may also want to try these:
Top K = 40,1,"When he first met the other cats, they did not trust him because Maukoo was so large. So, they all began to make plans to play together as they were always together. That is when the ""Mau"
Beam Size = 10,0,"He's a bit of a loner, but he does get along well with other cats. He is very affectionate and loves to cuddle. He is very affectionate and loves to cuddle. He loves"
Beam Size = 10,1,He loves to be petted and cuddled. He loves to be scratched. He loves to be petted and cuddled. He loves to be petted and cuddled. He loves to be pet


In [6]:
compare_responses_of_sampling_techniques(
    'It\'s Black Friday, and Min wants to buy a new ski jacket. However, she doesn\'t know '
    'what to buy. '
)

Unnamed: 0,Unnamed: 1
Prompt,"It's Black Friday, and Min wants to buy a new ski jacket. However, she doesn't know what to buy."


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,"She has no money. She's not a skier. She doesn't even know how to use the internet. She doesn't know how to shop for clothes. She's a waitress at a local diner, and"
"Beam Size = 2, P = 0.95, Top K = 100",1,"She's got no idea what to buy. She's been on the hunt for a long time. She's on the phone with her friend, and she's looking for a new jacket. She's on the phone"
"P = 0.95, Top K = 100",0,"A friend had recommended BOSS, and she just had to be one of those ""bros"" who likes BOSS. So, like a young woman, she goes to BOSS and she picks out an awesome looking jacket. She put"
"P = 0.95, Top K = 100",1,"Why not just the new one? Maybe she could just buy the cheaper version that had the same logo, but cost less? Wait, what's Min doing anyway? Is she heading to the store now?"
Nucleus Sampling P = 0.95,0,"She has a total of $5,180 left to invest (after bonus expenses). It's currently 58,140 in your account - and prices have gone up by 10%. What else could she do? He can use savings to give"
Nucleus Sampling P = 0.95,1,"""My heart tells me that there's a best fit,"" she says. ""My instincts are telling me, 'Go buy a skis.'"" She begins shopping for a new one. For those who say, ""and for the love of the"
Top K = 40,0,"Why doesn't she just go online and research the brand she wants? After some research, she decides to go to eBay and look for a pair of jeans. ""So I'm searching through eBay, and I'm trying to"
Top K = 40,1,"When she comes back from her shopping trip, she finds a mysterious box on her doorstep. It's written on it, ""I'm sorry I'm late, but I'm coming over right now. If you come, bring your jacket."""
Beam Size = 10,0,She has no idea what to buy. She has no idea what to buy. She has no idea what to buy. She has no idea what to buy. She has no idea what to buy.
Beam Size = 10,1,"She's used to shopping at the mall, but this is the first time she's been to a department store. She's also used to shopping at the mall, but this is the first time she's been to a department store."


In [7]:
compare_responses_of_sampling_techniques(
    'In the heart of a whimsical forest, where trees whispered secrets to the wind and '
    'flowers danced in the sunlight, there lived a tiny ladybug named Bella. '
)

Unnamed: 0,Unnamed: 1
Prompt,"In the heart of a whimsical forest, where trees whispered secrets to the wind and flowers danced in the sunlight, there lived a tiny ladybug named Bella."


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,"Bella and her friends were the only creatures in the forest to speak their native language. A few of Bella's friends were a bit odd, but Bella was always happy to help them out. One day, Bella's friend"
"Beam Size = 2, P = 0.95, Top K = 100",1,"Bella had been abandoned as a baby by her mother who had left the world before she was born. After a year and a half of wandering the forest, Bella had found a place to call home. There she lived with"
"P = 0.95, Top K = 100",0,"With no home to call her own, Bella had to live out her life on the streets. One day Bella and her girlfriend were playing around on a nearby park bench when they spotted a sign, which seemed to be a good time to go."
"P = 0.95, Top K = 100",1,"One day she found a mysterious gem inside a green box. Her joy at finding this special treasure was unending. But one day when she was walking around the world, she ran into a big black moth. Bella knew her chances of"
Nucleus Sampling P = 0.95,0,"Bella was tied to a stretch of tree by a piece of wire. It was as if Bella were a child caught in mid-air from the air. Bella, this tiny insect, was trapped in this tree; and so it was that"
Nucleus Sampling P = 0.95,1,"The tale of her oddities and blissful life was told to newcomers in a spinoff novel in the time before YuYu Hakusho , but, in order to keep things short, I'll let the reader fill the background here. Bella"
Top K = 40,0,"This is the story of her story. You can read Bella's story here. She lives with her parents and their pet beetle, which they named Fluffy, in the quiet house at the top of a hill. There is a"
Top K = 40,1,"As the only one of her kind among her kind, Bella could live happily in her small space, but she wanted to go outside and see the world. Her friends and classmates at the school suggested, and she agreed, for the same reason so"
Beam Size = 10,0,"Bella was a bit of a tomboy. She liked to play with her friends, but she didn't like to be told what to do. She liked to do her own thing. She liked to make her"
Beam Size = 10,1,"Bella was a shy and shy girl, but she liked to be the center of attention. She loved to be on display, and she loved to be the center of attention. She loved to be the center of attention."


In [8]:
compare_responses_of_sampling_techniques(
    'A hush fell over the crowded auditorium as the maestro raised his baton, signaling '
    'the commencement of the symphony. '
)

Unnamed: 0,Unnamed: 1
Prompt,"A hush fell over the crowded auditorium as the maestro raised his baton, signaling the commencement of the symphony."


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,"The audience was silent as the music was played and then the audience was once again hushed as the music began. It was a beautiful piece of music, a beautiful piece of music. The music was so beautiful, so beautiful"
"Beam Size = 2, P = 0.95, Top K = 100",1,"The orchestra had been scheduled to play a pre-recorded portion of the piece, but the conductor and pianist opted to perform the piece in its entirety, with the orchestra following suit. The entire audience was enthralled by the music"
"P = 0.95, Top K = 100",0,"The audience stood up and the great ensemble, led by the Orchestra of Saint John of Jerusalem (New York), began its march through the air. The concert was, to be brief, a great success. I was very impressed"
"P = 0.95, Top K = 100",1,"I couldn't see the conductor's face, but I knew that he was wearing the full-length cape that was worn at the start of the concert. So I took a deep breath and was at the baton's end just before"
Nucleus Sampling P = 0.95,0,"The audience roared, and the men sprung to their feet. Music and dance! Socrates went straight to the banquet halls, and mingled in the caterers that offered endless morsels of delicacies, pigeons, grapes"
Nucleus Sampling P = 0.95,1,"The conductor needed only to listen, and then soundly embrace, The Master, and he was marked with the mark of the Master on his back as the buzz, a collective and reverberating hum, became widespread. In later times, there was"
Top K = 40,0,"The orchestra slowly started to play the melody: ""Kazurui. O Taki. Momo-chan."" As the piece started, the entire audience stood up and began to cheer. As the"
Top K = 40,1,"A few moments later, the first notes of the orchestra's second movement were projected on the front row of the hall, prompting applause and cheers. As the second movement was about to begin, a young young man came in, followed by"
Beam Size = 10,0,"For a few moments, it seemed as if the entire room would rise to its feet and applaud. Then, just as the orchestra began to play, the crowd fell silent. It was as if the audience had been waiting for"
Beam Size = 10,1,"A moment later, the hall erupted in applause. In the second movement of the symphony, the orchestra began to play the theme from the movie, ""Star Wars."" The audience clapped and cheered as the orchestra began to"


In [9]:
compare_responses_of_sampling_techniques(
    'Justine grew up in Alaska, and worked alongside her family on a small commercial '
    'salmon fishing boat during the summers until she graduated from college. '
)

Unnamed: 0,Unnamed: 1
Prompt,"Justine grew up in Alaska, and worked alongside her family on a small commercial salmon fishing boat during the summers until she graduated from college."


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,She was a member of the United States Coast Guard for four years. She was a member of the Alaska State Troopers for four years. She was a member of the United States Coast Guard for four years. She was
"Beam Size = 2, P = 0.95, Top K = 100",1,"She loves to travel and has traveled extensively. She and her husband have two children, an 11-year-old daughter and a 5-year-old son. She loves to travel, and has traveled extensively, including a"
"P = 0.95, Top K = 100",0,"For her 22nd birthday, she went to a local diner to enjoy lunch, and happened to stay there for a couple of hours. And that's where she met Jason. She saw his face. As she said"
"P = 0.95, Top K = 100",1,"When she was 21, she took a job on a private fishing boat for a young lady named Aliana Johnson, and they were married soon after. They have a son, Sean, the eldest of the Johnson children, and daughter A"
Nucleus Sampling P = 0.95,0,"She did not believe in climate change, but she loved living in Alaska and saw it as an adventure that enabled her to travel the world and learn about different cultures. She moved to London in 2007 to go to the University of London to"
Nucleus Sampling P = 0.95,1,"She decided to pursue a PhD and did much research on bats. I do not want to allow this episode of Laguna nothing bad to happen to my friend back in Alaska. So, here is some amazing news for you guys. A"
Top K = 40,0,"By the time she and her friends got to the mainland it was well too late for the salmon fishery, but it did allow her to discover her passion for film making. Her first film, Wild Tales , was a documentary about the"
Top K = 40,1,"Her mother told her that she and her sister were not the only ones with a passion for her father's work. She said that it would be best if she decided to go to school and take a second job. Unfortunately,"
Beam Size = 10,0,"During this time, she was able to spend more time with her family. After college, she moved to Portland, Oregon, where she worked for a few years as a full-time stay-at-home mom. While"
Beam Size = 10,1,"After graduating, she moved to New York City where she worked as a freelance graphic designer. After a few years of freelancing, she decided to pursue her dream of becoming a professional illustrator. She now lives in New York"


In [10]:
compare_responses_of_sampling_techniques(
    'A team of scientists has made an astonishing discovery in the depths of the Amazon '
    'Rainforest – a new species of giant turtle. '
)

Unnamed: 0,Unnamed: 1
Prompt,A team of scientists has made an astonishing discovery in the depths of the Amazon Rainforest – a new species of giant turtle.


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,"The turtle was discovered by a team of researchers from the University of São Paulo, Brazil and the National Institute of Biological Sciences (INFN) in São Paulo. The turtle was discovered near the town of Tocantins,"
"Beam Size = 2, P = 0.95, Top K = 100",1,"It's been dubbed the ""largest living turtle in the world"" after its enormous size and the fact that it's the biggest living turtle in the world. The new species is named after the Brazilian town where it was found, and is"
"P = 0.95, Top K = 100",0,"They may not look like anything we've seen before, but they may just be the first to survive in a cold, unforgiving place where life is hard to find. The Giant Toto is a freshwater turtle that lives in dense forest vegetation"
"P = 0.95, Top K = 100",1,"When the species was first spotted, scientists were astonished and thought they had found a new species. However, they were fooled into thinking that the new species, named Ptychodontis and its new found relative, Diplod"
Nucleus Sampling P = 0.95,0,"(NatGeo) In the 1980s, the US Department of Agriculture's Natural Resources Conservation Service launched a US-funded scientific expedition to collect DNA samples from five (3 captive, 2 wild) turtle species that inhabited the Amazon basin at"
Nucleus Sampling P = 0.95,1,"Fresh Bristlecone Pine (Quercus-hircus) turtles, which are also found in Africa, last once inside North America. They are the largest members of the up to 50-kilogram order (120-pound) group"
Top K = 40,0,"Its scientific name is Ochotlkwania jacobsoni, and it belongs to a bizarre new 'turtle-like' suborder that includes crocodiles, giant turtles, and snakes. Although it doesn't seem"
Top K = 40,1,"Â This species is known as the Iguanodon (Spanish for 'the turtle who lives deep'). Â The findings will be presented at the recent meeting of the American Association of Physical Anthropologists. The turtle, known as I."
Beam Size = 10,0,"The new species of giant turtle was discovered by a team of scientists from the University of Exeter, UK. The discovery was made when a team of scientists from the University of Exeter, UK, using equipment from the UK's National"
Beam Size = 10,1,It's the first time a new species of giant turtle has been found in the world's largest rainforest. The discovery was made by a team of international scientists led by the University of Exeter in the United Kingdom. It


In [11]:
compare_responses_of_sampling_techniques(
"""In the hush of twilight's gentle embrace,
Where shadows dance and stars find their place,
A symphony of silence fills the air,
As nature's lullaby begins to prepare.

The day's vibrant hues gently fade away,
Replaced by a canvas of silver and gray,"""
)

Unnamed: 0,Unnamed: 1
Prompt,"In the hush of twilight's gentle embrace, Where shadows dance and stars find their place, A symphony of silence fills the air, As nature's lullaby begins to prepare. The day's vibrant hues gently fade away, Replaced by a canvas of silver and gray,"


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,"The sun has set on the mountains, and the moon has set on the sky. The sky is now filled with a golden glow, The sky is now filled with a golden glow, And the day is now"
"Beam Size = 2, P = 0.95, Top K = 100",1,"As the sun's beams are replaced by the moon's. The silence of the night is broken by the sound Of voices, and voices are the only ones that can be heard. The night's silence is broken"
"P = 0.95, Top K = 100",0,"And we return to our lonely chambers, Where night and day are alike so, And every breath is a breath of darkness. Eyes will go blank in a silence that's beyond words, But the"
"P = 0.95, Top K = 100",1,"A landscape of death and decay, and the cold embrace of death. The dark gray of night, an ocean of night, Shines in the darkness of the horizon. It's only a small world, a"
Nucleus Sampling P = 0.95,0,"As shadows trail in that other direction which we find In the dark figure of the martyred saint. It's just the same as the morning's beautiful color, But ever darker and more ominous. Oh,"
Nucleus Sampling P = 0.95,1,"A stone like sand in the air, the same as before. No fragments of rustling leaves or hearth fires remain. Everything fades and grows silent, no more cries, no more Markings on the walls"
Top K = 40,0,"And the night, a pale moon's glow, shines, As it's own light, shines upon the day's pale color. That gentle glow, it's its own light. That gentle glow, it's"
Top K = 40,1,"And once you've stopped singing your music no longer sounds Like a melody you're singing. And in that silence the world is quiet, And there is nothing to see.<|endoftext|>An Australian politician has been jailed for"
Beam Size = 10,0,"As the night's murmur of wind and rain echoes through the air. In the hush of twilight's gentle embrace, Where shadows dance and stars find their place, A symphony of silence fills the air"
Beam Size = 10,1,"A scene of nature's calm and serenity, In the hush of twilight's gentle embrace, A symphony of silence fills the air, As nature's lullaby begins to prepare. As"


In [12]:
compare_responses_of_sampling_techniques(
    """[FADE IN]

EXT. NEW YORK CITY STREETS - NIGHT

A bustling metropolis, New York City comes alive under the cloak of darkness. Skyscrapers 
pierce the sky, their windows glowing like a constellation of stars.

Amidst this urban jungle, a lone figure walks with a determined stride. This is ANNA, a young 
woman in her early twenties, with an air of mystery about her. """
)

Unnamed: 0,Unnamed: 1
Prompt,"[FADE IN] EXT. NEW YORK CITY STREETS - NIGHT A bustling metropolis, New York City comes alive under the cloak of darkness. Skyscrapers pierce the sky, their windows glowing like a constellation of stars. Amidst this urban jungle, a lone figure walks with a determined stride. This is ANNA, a young woman in her early twenties, with an air of mystery about her."


Unnamed: 0_level_0,Unnamed: 1_level_0,Response
Sampling Technique,Trial,Unnamed: 2_level_1
"Beam Size = 2, P = 0.95, Top K = 100",0,ANNA (V.O.) I've been trying to get out of New York for a long time. I've been trying to get out of New York for a long time. I've been
"Beam Size = 2, P = 0.95, Top K = 100",1,"ANNA (V.O.) It was at the World Trade Center. I was on my way to the World Trade Center. ANNA walks to the edge of the plaza, where she looks over"
"P = 0.95, Top K = 100",0,"She is accompanied by a well-dressed man, a handsome man in his mid-thirties. He carries two m yond bags. He smiles as he talks to his companion. ANNA ("
"P = 0.95, Top K = 100",1,"ANNA We've got to go get 'em, Jack. She stops, turns to the man next to her and looks him dead in the eye. ANNA For the price of a day and"
Nucleus Sampling P = 0.95,0,CUT TO: INT. EXT. NEW YORK CENTRAL LOOP - NIGHT A bustling avenue leads to a great city building that most people would not expect to be in the area. INT. CIRCA RAT
Nucleus Sampling P = 0.95,1,"ANNA It's good to be back home. I always missed being away. ANNA has no idea what she just said. She comes across another lone figure who, again, she doesn't"
Top K = 40,0,"UNDER THE GUARDIAN'S GUIDE, an unassuming woman, a woman of the West Wing: NANCY MUELLER: And this is my partner, Amy. AN"
Top K = 40,1,"She wears an innocent smile, her eyes never leaving the road. She reaches the curb. No one follows her into the darkness. INT. SKYSCRAPER WINDOW ON NEW YORK STREETS C"
Beam Size = 10,0,ANNA (to the camera) I'm here. The camera follows her as she walks through the city. ANNA (CONT'D) I'm here. I'm going
Beam Size = 10,1,ANNA (whispering) Don't look at me. Don't look at me. Don't look at me. Don't look at me. Don't look at me. Don't look at me. Don
