In [96]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(base_url=os.getenv("URL"), api_key=os.getenv("API_KEY"))

resp = client.completions.create(
    model="meta-llama/Meta-Llama-3.1-8B",
    temperature=0.2,
    max_tokens=100,
    stop=["\n"],
    prompt="Shakespeare was",
)
print(resp.choices[0].text) 


 7


In [54]:
from typing import List, Optional


def prompt_zero_shot(problem: str) -> str:
    return problem + " Correct Answer: "


def extract_zero_shot(completion: str) -> Optional[int]:
    if completion.isdigit():
        return int(completion)

def solve_zero_shot(problem: str) -> Optional[int]:
    resp = client.completions.create(
        model="meta-llama/Meta-Llama-3.1-8B",
        temperature=0.1,
        prompt=prompt_zero_shot(problem),
        max_tokens=1
    )
    
    return extract_zero_shot(resp.choices[0].text)

In [87]:
from tqdm.auto import tqdm

EASY = [
    { 
        "question": "I ate 2 pears and 1 apple. How many fruit did I eat?",
        "answer": 3 
    },
    {
        "question": "I had ten chocolates but ate one. How many remain?",
        "answer": 9
    }
]

def accuracy_zero_shot(problems: List[dict]) -> List[float]:

    accuracies = []

    for _ in tqdm(range(5)):
        total_correct = 0

        for item in problems:
            answer = solve_zero_shot(item["question"])

            if answer == item["answer"]:
                total_correct += 1
        accuracies.append(total_correct / len(problems))
    
    return accuracies

accuracy_zero_shot(EASY)


100%|██████████| 5/5 [00:00<00:00,  6.60it/s]


[1.0, 1.0, 1.0, 1.0, 1.0]

In [88]:
import datasets

TEST = datasets.load_dataset("nuprl/llm-systems-math-word-problems", split="test")
print(accuracy_zero_shot(TEST))

100%|██████████| 5/5 [00:18<00:00,  3.62s/it]

[0.16, 0.16, 0.16, 0.14, 0.16]





In [110]:
def prompt_few_shot(problem: str) -> str:
    return f"""
    Question: I ate 2 pears and 1 apple. How many fruit did I eat?
    Answer: 3

    Question: I had ten chocolates but ate one. How many remain?
    Answer: 9

    Question: {problem}
    Answer:"""

def extract_few_shot(completion: str) -> Optional[int]:
    # only extract digits from str
    return int("".join(filter(str.isdigit, completion)))

def solve_few_shot(problem: str) -> Optional[int]:    
    resp = client.completions.create(
        model="meta-llama/Meta-Llama-3.1-8B",
        temperature=0.2,
        prompt=prompt_few_shot(problem),
        max_tokens=100,
        stop=["\n"]
    )
    
    return extract_few_shot(resp.choices[0].text)

def accuracy_few_shot(problems: List[dict]) -> float:    
    accuracies = []

    for _ in tqdm(range(5)):
        total_correct = 0

        for item in problems:
            answer = solve_few_shot(item["question"])

            if answer == item["answer"]:
                total_correct += 1
        accuracies.append(total_correct / len(problems))
    
    return accuracies

In [111]:
accuracy_few_shot(TEST)

100%|██████████| 5/5 [00:27<00:00,  5.48s/it]


[0.16, 0.16, 0.16, 0.14, 0.14]