# Few shot prompting

In [2]:
# from huggingface_hub import hf_hub_download 
import torch
from datasets import load_dataset, load_from_disk
import evaluate
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
from transformers import TrainingArguments, Trainer
#from peft import LoraConfig, TaskType, get_peft_model
#from peft import PeftConfig, PeftModel
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate

import pandas as pd
import numpy as np
import re
import os

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [3]:
print(transformers.__version__)

4.35.2


In [4]:
print(torch.__version__)
print(torch.cuda.is_available())

2.3.0+cu118
True


In [5]:
! nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:41:10_Pacific_Daylight_Time_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [9]:
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True)

def load_tokenizer_llm(AutoModelForClass, model_name, model_path, cache_dir, to_device=True, **kwargs):
    """
    if use load_in_4bit=True, do not set to_device=True
    kwargs:
        - device_map
        - torch_dtype
        - load_in_4bit
    """
    if not os.path.isfile(model_path + '/tokenizer.json'):
        print('no existing tokenizer found. Download from HF')
        tokenizer = AutoTokenizer.from_pretrained(model_name, 
                                                  cache_dir=cache_dir,
                                                  **kwargs
                                                 ) # to load tokenizer to cache
    else:
        print('existing tokenizer found. Load from local')
        tokenizer = AutoTokenizer.from_pretrained(model_path, 
                                                  cache_dir=cache_dir, 
                                                  local_flies_only=True)
    if not os.path.isfile(model_path + '/model.safetensors'):
        print('no existing model found. Download from HF')
        model = AutoModelForClass.from_pretrained(model_name,
                                                     cache_dir=cache_dir,
                                                     **kwargs
                                                    )
    else:
        print('existing model found. Load from local')
        model = AutoModelForClass.from_pretrained(model_path, 
                                                 cache_dir=cache_dir,
                                                 local_files_only=True)
    
    if to_device:
        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        #device.reset()
        model.to(device) # use GPU. Do not need this if using load_in_4bit as it's already been set to the correct devices
        
    return tokenizer, model

In [20]:
llm_repo_dir = 'D:/projects/LLM'
cache_dir = '/cygdrive/d/projects/LLM/.cache'
os.environ['TRANSFORMERS_CACHE'] = cache_dir
os.environ['HF_HOME'] = cache_dir + '/huggingface'
os.environ['XDG_CACHE_HOME'] = cache_dir
os.environ['HF_DATASETS_CACHE'] = cache_dir

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## mistral
#model_name = "mistralai/Mistral-7B-v0.1"
#model_path = cache_dir + ''

## falcon - decoder only
#model_name = "tiiuae/falcon-7b-instruct"
#model_path = cache_dir + '/models--tiiuae--falcon-7b-instruct/snapshots/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99'

## gpt2 - decoder only
#model_name = 'gpt2'
#model_path = cache_dir + '/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e'

## T5-small - encoder-decoder
#model_name = 'google-t5/t5-small'
#model_path = cache_dir + '/models--google-t5--t5-small/snapshots/df1b051c49625cf57a3d0d8d3863ed4d13564fe4'

## T5-3b
model_name = 'google-t5/t5-3b'
model_path = cache_dir + '/models--google-t5--t5-3b/snapshots/bed96aab9ee46012a5046386105ee5fd0ac572f0'

dataset_path = cache_dir + '/parquet/yelp_polarity' # cache_dir + '/parquet/yelp_review_full-e22176106d6e7534'
dataset_name = 'yelp_polarity' # yelp_review_full
#tokenized_data_path = cache_dir + '/tokenized_dataset_yelp_polarity_gpt2'
#tokenized_data_path = cache_dir + '/tokenized_dataset_yelp_polarity_mistral7b'
#tokenized_data_path = cache_dir + '/tokenized_dataset_yelp_polarity_falcon7b'
#tokenized_data_path = cache_dir + '/tokenized_dataset_yelp_polarity_t5small'
tokenized_data_path = cache_dir + '/tokenized_dataset_yelp_polarity_t53b'

In [13]:
AutoModelForClass = AutoModelForSeq2SeqLM
to_device = True
tokenizer, model = load_tokenizer_llm(AutoModelForClass, model_name, model_path, cache_dir, to_device=to_device)

existing tokenizer found. Load from local
existing model found. Load from local


In [14]:
if not os.path.isdir(dataset_path):
    dataset = load_dataset(dataset_name, cache_dir=cache_dir + '/parquet')
else:
    dataset = load_dataset(dataset_path)

In [15]:
dataset['train']['text'][0]

"Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff.  It seems that his staff simply never answers the phone.  It usually takes 2 hours of repeated calling to get an answer.  Who has time for that or wants to deal with it?  I have run into this problem with many other doctors and I just don't get it.  You have office workers, you have patients with medical needs, why isn't anyone answering the phone?  It's incomprehensible and not work the aggravation.  It's with regret that I feel that I have to give Dr. Goldberg 2 stars."

In [16]:
dataset['train']['text'][1]

"Been going to Dr. Goldberg for over 10 years. I think I was one of his 1st patients when he started at MHMG. He's been great over the years and is really all about the big picture. It is because of him, not my now former gyn Dr. Markoff, that I found out I have fibroids. He explores all options with you and is very patient and understanding. He doesn't judge and asks all the right questions. Very thorough and wants to be kept in the loop on every aspect of your medical health and your life."

## zero-shot prompting

In [17]:
prompt = """ 
Summarize: {0}
""".format(dataset['train']['text'][0]).strip()

In [18]:
prompt

"Summarize: Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff.  It seems that his staff simply never answers the phone.  It usually takes 2 hours of repeated calling to get an answer.  Who has time for that or wants to deal with it?  I have run into this problem with many other doctors and I just don't get it.  You have office workers, you have patients with medical needs, why isn't anyone answering the phone?  It's incomprehensible and not work the aggravation.  It's with regret that I feel that I have to give Dr. Goldberg 2 stars."

In [21]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
generated_ids = model.generate(input_ids, 
                               do_sample=False,
                               max_new_tokens=100)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(generated_text)

. to to to.... I'm not sure if I'll be back. I'm not sure if I'll be recommending him to anyone.


In [22]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
generated_ids = model.generate(input_ids, 
                               do_sample=True, 
                               temperature=0.3,
                               max_new_tokens=100,
                               top_k=10)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(generated_text)

. to to to.... I wish I could give him 3. I've been a patient of Dr. Goldberg for over 15 years. He is a great doctor.


In [23]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
generated_ids = model.generate(input_ids, 
                               do_sample=True,
                               temperature=0.3,
                               max_new_tokens=100,
                               top_p=10)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(generated_text)

. to to to.... I would give him a 3 star rating if I could. Dr. Goldberg is a good doctor, but his staff is awful.


In [24]:
%%time

input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
generated_ids = model.generate(input_ids, 
                               do_sample=True, 
                               temperature=0.3,
                               max_new_tokens=100,
                               top_p=5,
                               top_k=5)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(generated_text)

. to to to.... I've had a great experience with him and I would recommend him to anyone. I would give him 3 stars if I could.
CPU times: total: 35.5 s
Wall time: 53.1 s


## few-shot prompting

## CoT


## Use case: topic modeling

Use few-shot prompting to determine (up to three) topic(s) of a comment. Use comments with existing topics as example prompts.

**Question**: will the LLM be able to determine a new topic that is outside of the scope of the existing topics in the examples?

First experiment with ChatGPT (~125b parameters). If it is able to do a good job, then we can try smaller models (7-70b params).