# An Introduction to Prompting for LLMs

https://medium.com/data-science/an-introduction-to-prompting-for-llms-61d36aec2048

Download data from https://drive.google.com/file/d/1ImYUSLk9JbgHXOemfvyiDiirluZHPeQw/view

In [7]:
!pip install -q hf_xet transformers

In [3]:
import zipfile

# Path to your zip file
zip_path = 'data_clean.zip'

# Directory to extract to
extract_to = './data'

# Unzip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f'Extracted to {extract_to}')

Extracted to ./data


In [4]:
import json
import random

random.seed(42)

def read_jsonl_file(file_path):
    """
    Parses a JSONL (JSON Lines) file and returns a list of dictionaries.

    Args:
        file_path (str): The path to the JSONL file to be read.

    Returns:
        list of dict: A list where each element is a dictionary representing
            a JSON object from the file.
    """
    with open(file_path, 'r', encoding="utf-8") as file:
        return [json.loads(line) for line in file]

def write_jsonl_file(dict_list, file_path):
    """
    Write a list of dictionaries to a JSON Lines file.

    Args:
    - dict_list (list): A list of dictionaries to write to the file.
    - file_path (str): The path to the file where the data will be written.
    """
    with open(file_path, 'w') as file:
        for dictionary in dict_list:
            # Convert the dictionary to a JSON string and write it to the file.
            json_line = json.dumps(dictionary)
            file.write(json_line + '\n')

In [5]:
# read the contents of the train and test set
train_set = read_jsonl_file("data/data_clean/questions/US/4_options/phrases_no_exclude_train.jsonl")
test_set = read_jsonl_file("data/data_clean/questions/US/4_options/phrases_no_exclude_test.jsonl")

# subsample test set samples and few-shot samples
test_set_subsampled = random.sample(test_set, 300)
few_shot_examples = random.sample(test_set, 3)

# dump the sampled questions and few-shot samples as jsonl files
write_jsonl_file(test_set_subsampled, "data/USMLE_test_samples_300.jsonl")
write_jsonl_file(few_shot_examples, "data/USMLE_few_shot_samples.jsonl")

Prompting Llama 2 7B-Chat with a Zero-Shot Prompt

In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from google.colab import userdata

HUGGING_FACE_TOKEN = userdata.get('HUGGING_FACE_ACCESS_TOKEN')

questions = read_jsonl_file("data/USMLE_test_samples_300.jsonl")

model_id: str = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGING_FACE_TOKEN)

model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, token=HUGGING_FACE_TOKEN).cuda()

model.eval()

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_e

In [11]:
def create_query(item: dict)->str:
    return f"{item.get('question')}\noptions:\n:"\
        f"A. {item.get('options').get('A')}\n"\
        f"B. {item.get('options').get('B')}\n"\
        f"C. {item.get('options').get('C')}\n"\
        f"D. {item.get('options').get('D')}"

def build_zero_shot_prompting(system_prompt: str, content: dict)->list[dict]:
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": create_query(content) }
    ]

PROMPT = """You will be provided with a medical or clinical question, along with multiple possible answer choices. Pick the right answer from the choices.
Your response should be in the format "The answer is <correct_choice>". Do not add any other unnecessary content in your response"""

In [54]:
%%time

def get_zero_llama_answers(item: dict)->str:
    zero_shot_prompt_messages =  build_zero_shot_prompting(PROMPT, item)
    input_ids = tokenizer.apply_chat_template(zero_shot_prompt_messages, tokenize=True, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model.generate(input_ids=input_ids, max_new_tokens=10, do_sample=True)
    cpu_outputs = outputs.detach().cpu()
    gen_text = tokenizer.batch_decode(cpu_outputs[:, input_ids.shape[1]:])[0].strip()
    return gen_text

def generate_answer():
    for question in questions:
        yield get_zero_llama_answers(question)

zero_shot_llama_answers: list[str] = [answer  for answer in generate_answer()]

CPU times: user 1min 51s, sys: 71.1 ms, total: 1min 51s
Wall time: 1min 51s


In [13]:
import re

pattern = re.compile(r"([A-Z])\.\s*(.*)")

def parse_answer(response):
    """
    Extracts the answer option from the predicted string.

    Args:
    - response (str): The string to search for the pattern.

    Returns:
    - str: The matched answer option if found or an empty string otherwise.
    """
    match = re.search(pattern, response)
    if match:
        letter = match.group(1)
    else:
        letter = ""

    return letter

In [55]:
zero_shot_llama_predictions: list[str] = [parse_answer(answer) for answer in zero_shot_llama_answers]
ground_truth: list[str] = [item["answer_idx"] for item in questions]

In [56]:
import numpy as np

def accuracy(target: list[str], predictions: list[str])->float:
    return (np.array(target)==np.array(predictions)).mean().item()

acc: float = accuracy(zero_shot_llama_predictions, ground_truth)

print("Accuracy in Zero-shot: {0:.3f} ({0:.2%})".format(acc))

Accuracy in Zero-shot: 0.227 (22.67%)
