# Generate text with zero-shot, one-shot, and few-shot inference

In [112]:
import psutil

notebook_memory = psutil.virtual_memory()
print(notebook_memory)

if notebook_memory.total < 32 * 1000 * 1000 * 1000:
    print('*******************************************')    
    print('YOU ARE NOT USING THE CORRECT INSTANCE TYPE')
    print('PLEASE CHANGE INSTANCE TYPE TO  m5.2xlarge ')
    print('*******************************************')
else:
    correct_instance_type=True

svmem(total=33229979648, available=21551951872, percent=35.1, used=11255922688, free=14773370880, active=13153267712, inactive=4822515712, buffers=0, cached=7200686080, shared=1040384, slab=223895552)


In [113]:
model_checkpoint = 'google/flan-t5-base'
dataset_templates_name = 'amazon_us_reviews/Wireless_v1_00'
prompt_template_name = 'Generate review headline based on review body'

# Create prompts for few-shot, one-shot, zero-shot inference on sample data

In [123]:
import pandas as pd
import csv
file = './data-tsv/amazon_reviews_us_Digital_Video_Games_v1_00.tsv.gz'

# Read the file
df = pd.read_csv(file, delimiter="\t", quoting=csv.QUOTE_NONE, compression="gzip")

df.isna().values.any()
df = df.dropna()
df = df.reset_index(drop=True)    

print("Shape of dataframe {}".format(df.shape))

# Convert Pandas dataframes into Datasets
import datasets
from datasets import Dataset

# Create Dataset objects (Arrow PyTables) from Pandas dataframes
dataset = Dataset.from_pandas(df)
df.head()

Shape of dataframe (145427, 15)


Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date
0,US,21269168,RSH1OZ87OYK92,B013PURRZW,603406193,Madden NFL 16 - Xbox One Digital Code,Digital_Video_Games,2,2,3,N,N,A slight improvement from last year.,I keep buying madden every year hoping they ge...,2015-08-31
1,US,133437,R1WFOQ3N9BO65I,B00F4CEHNK,341969535,Xbox Live Gift Card,Digital_Video_Games,5,0,0,N,Y,Five Stars,Awesome,2015-08-31
2,US,45765011,R3YOOS71KM5M9,B00DNHLFQA,951665344,Command & Conquer The Ultimate Collection [Ins...,Digital_Video_Games,5,0,0,N,Y,Hail to the great Yuri!,If you are prepping for the end of the world t...,2015-08-31
3,US,113118,R3R14UATT3OUFU,B004RMK5QG,395682204,Playstation Plus Subscription,Digital_Video_Games,5,0,0,N,Y,Five Stars,Perfect,2015-08-31
4,US,22151364,RV2W9SGDNQA2C,B00G9BNLQE,640460561,Saints Row IV - Enter The Dominatrix [Online G...,Digital_Video_Games,5,0,0,N,Y,Five Stars,Awesome!,2015-08-31


In [124]:
from promptsource.templates import DatasetTemplates
prompt_templates = DatasetTemplates(dataset_templates_name) 

print('*** Available prompts:')

for template in prompt_templates.templates.values():
    print(template.get_name())

*** Available prompts:
Generate review headline based on review body
Generate review based on rating and category
Given the review headline return a categorical rating
Generate review headline based on rating
Given the review body return a categorical rating


In [125]:
from pprint import pprint

prompt = prompt_templates[prompt_template_name]
print('** Selected prompt name: {}'.format(prompt_template_name))

** Selected prompt name: Generate review headline based on review body


In [126]:
print('** Available prompt answers: {}'.format(prompt.answer_choices))

** Available prompt answers: None


In [127]:
print('** Selected prompt template:')
pprint(prompt.__dict__)

** Selected prompt template:
{'answer_choices': None,
 'id': '5feaa0d7-e4e0-46cc-8517-e00bfa7fd00e',
 'jinja': 'Give a short sentence describing the following product review:\n'
          '{{review_body}} \n'
          '|||\n'
          '{{review_headline}}',
 'metadata': <promptsource.templates.Template.Metadata object at 0x7fb9ccd9c590>,
 'name': 'Generate review headline based on review body',
 'reference': 'Generate review headline based on review body'}


# Prepare zero-shot, one-shot, and few-shot prompts for inference

In [152]:
dataset = dataset.select([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).map(lambda row : {'prompt': prompt.apply(row)[0], 'label': prompt.apply(row)[1]})

prompt0 = dataset[0]
prompt1 = dataset[1]
prompt2 = dataset[2]
prompt3 = dataset[3]

zero_shot_prompt = prompt2['prompt'] + '\n'
one_shot_prompt = prompt0['prompt'] + '\n' + prompt0['label'] + '\n\n' + prompt2['prompt'] + '\n'
few_shot_prompt = prompt0['prompt'] + '\n' + prompt0['label'] + '\n\n' + prompt1['prompt'] + '\n' + prompt1['label'] + '\n\n' + prompt3['prompt'] + '\n' + prompt3['label'] + '\n\n' + prompt2['prompt'] + '\n'

  0%|          | 0/10 [00:00<?, ?ex/s]

# Perform zero-shot, one-shot, few-shot inference BEFORE fine-tuning

In [153]:
from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [154]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

# Zero-shot

In [160]:
print(one_shot_prompt)

inputs = tokenizer(zero_shot_prompt, return_tensors='pt')

response = tokenizer.decode(model.generate(inputs["input_ids"], 
                       max_new_tokens=200,
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0],
                     skip_special_tokens=True)

print('RESPONSE: {}'.format(response))
print('EXPECTED RESPONSE: {}'.format(prompt2['label']))

Give a short sentence describing the following product review:
I keep buying madden every year hoping they get back to football. This years version is a little better than last years -- but that's not saying much.The game looks great. The only thing wrong with the animation, is the way the players are always tripping on each other.<br /><br />The gameplay is still slowed down by the bloated pre-play controls. What used to take two buttons is now a giant PITA to get done before an opponent snaps the ball or the play clock runs out.<br /><br />The turbo button is back, but the player movement is still slow and awkward. If you liked last years version, I'm guessing you'll like this too. I haven't had a chance to play anything other than training and a few online games, so I'm crossing my fingers and hoping the rest is better.<br /><br />The one thing I can recommend is NOT TO BUY THE MADDEN BUNDLE. The game comes as a download. So if you hate it, there's no trading it in at Gamestop.
A sl

# One-shot

In [159]:
# print(one_shot_prompt)

# inputs = tokenizer(one_shot_prompt, return_tensors='pt')

# response = tokenizer.decode(model.generate(inputs["input_ids"], 
#                        max_new_tokens=200,
#                        do_sample=True, 
#                        top_k=50, 
#                        top_p=0.9
#                       )[0],
#                      skip_special_tokens=True)

# print('RESPONSE: {}'.format(response))
# print('EXPECTED RESPONSE: {}'.format(prompt2['label']))

Give a short sentence describing the following product review:
I keep buying madden every year hoping they get back to football. This years version is a little better than last years -- but that's not saying much.The game looks great. The only thing wrong with the animation, is the way the players are always tripping on each other.<br /><br />The gameplay is still slowed down by the bloated pre-play controls. What used to take two buttons is now a giant PITA to get done before an opponent snaps the ball or the play clock runs out.<br /><br />The turbo button is back, but the player movement is still slow and awkward. If you liked last years version, I'm guessing you'll like this too. I haven't had a chance to play anything other than training and a few online games, so I'm crossing my fingers and hoping the rest is better.<br /><br />The one thing I can recommend is NOT TO BUY THE MADDEN BUNDLE. The game comes as a download. So if you hate it, there's no trading it in at Gamestop.
A sl

# Few-shot

In [164]:
# print(few_shot_prompt)

# inputs = tokenizer(few_shot_prompt, return_tensors='pt')

# response = tokenizer.decode(model.generate(inputs["input_ids"], 
#                        max_new_tokens=200,
#                        do_sample=True, 
#                        top_k=50, 
#                        top_p=0.9
#                       )[0],
#                      skip_special_tokens=True)
                            
# print('RESPONSE: {}'.format(response))
# print('EXPECTED RESPONSE: {}'.format(prompt2['label']))

Give a short sentence describing the following product review:
I keep buying madden every year hoping they get back to football. This years version is a little better than last years -- but that's not saying much.The game looks great. The only thing wrong with the animation, is the way the players are always tripping on each other.<br /><br />The gameplay is still slowed down by the bloated pre-play controls. What used to take two buttons is now a giant PITA to get done before an opponent snaps the ball or the play clock runs out.<br /><br />The turbo button is back, but the player movement is still slow and awkward. If you liked last years version, I'm guessing you'll like this too. I haven't had a chance to play anything other than training and a few online games, so I'm crossing my fingers and hoping the rest is better.<br /><br />The one thing I can recommend is NOT TO BUY THE MADDEN BUNDLE. The game comes as a download. So if you hate it, there's no trading it in at Gamestop.
A sl