In [1]:
from agential.prompting.cot.prompting import CoT

from agential.prompting.cot.prompts import (
    COT_INSTRUCTION_AMBIGNQ, 
    COT_INSTRUCTION_FEVER, 
    COT_INSTRUCTION_GSM8K, 
    COT_INSTRUCTION_HOTPOTQA, 
    COT_INSTRUCTION_SVAMP, 
    COT_INSTRUCTION_TRIVIAQA,
    COT_INSTRUCTION_TABMWP,
    COT_INSTRUCTION_HUMANEVAL,
    COT_INSTRUCTION_MBPP,
)
from agential.core.fewshots.ambignq import AMBIGNQ_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.fever import FEVER_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.gsm8k import GSM8K_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.hotpotqa import HOTPOTQA_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.svamp import SVAMP_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.triviaqa import TRIVIAQA_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.tabmwp import TABMWP_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.humaneval import HUMANEVAL_FEWSHOT_EXAMPLES_COT
from agential.core.fewshots.mbpp import MBPP_FEWSHOT_EXAMPLES_COT

import warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()

from agential.llm.llm import LLM

llm = LLM("gpt-3.5-turbo")

# QA

### HotpotQA

In [None]:
question = 'Who was once considered the best kick boxer in the world, however he has been involved in a number of controversies relating to his "unsportsmanlike conducts" in the sport and crimes of violence outside of the ring'

method = CoT(
    llm=llm,
    benchmark="hotpotqa",
)

out = method.generate(
    question=question,
    examples=HOTPOTQA_FEWSHOT_EXAMPLES_COT, 
    prompt=COT_INSTRUCTION_HOTPOTQA,
    additional_keys={},
)

In [None]:
out

### FEVER

In [None]:
question = "Nikolaj Coster-Waldau worked with the Fox Broadcasting Company."

method = CoT(
    llm=llm,
    benchmark="fever",
)

out = method.generate(
    question=question,
    examples=FEVER_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_FEVER,
    additional_keys={},
)

In [None]:
out

### AmbigNQ

In [None]:
question = "When did the simpsons first air on television?"

method = CoT(
    llm=llm,
    benchmark="ambignq",
)

out = method.generate(
    question=question,
    examples=AMBIGNQ_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_AMBIGNQ,
    additional_keys={},
)

In [None]:
out

### TriviaQA

In [None]:
question = "Which American-born Sinclair won the Nobel Prize for Literature in 1930?"

method = CoT(
    llm=llm,
    benchmark="triviaqa",
)

out = method.generate(
    question=question,
    examples=TRIVIAQA_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_TRIVIAQA,
    additional_keys={},
)

In [None]:
out

# Math

### GSM8K

In [None]:
question = "Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with 4933828. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"

method = CoT(
    llm=llm, 
    benchmark="gsm8k",
)

out = method.generate(
    question=question,
    examples=GSM8K_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_GSM8K,
    additional_keys={},
)

In [None]:
out

### SVAMP

In [None]:
question = "There are 87 oranges and 290 bananas in Philip's collection. If the bananas are organized into 2 groups and oranges are organized into 93 groups. How big is each group of bananas?"

method = CoT(
    llm=llm, 
    benchmark="svamp",
)

out = method.generate(
    question=question,
    examples=SVAMP_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_SVAMP,
    additional_keys={},
)

In [None]:
out

### TabMWP

In [None]:
question = """Read the following table regarding "Bowling Scores" and then write Python code to answer a question:

Name | Score
Amanda | 117
Sam | 236
Irma | 144
Mike | 164

Question: Some friends went bowling and kept track of their scores. How many more points did Mike score than Irma?"""

method = CoT(
    llm=llm, 
    benchmark="tabmwp",
)

out = method.generate(
    question=question,
    examples=TABMWP_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_TABMWP,
    additional_keys={},
)

In [None]:
out

# Code

### HumanEval

In [None]:
inst = {"task_id": "HumanEval/0", "prompt": "from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n    given threshold.\n    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n    False\n    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n    True\n    \"\"\"\n", "entry_point": "has_close_elements", "canonical_solution": "    for idx, elem in enumerate(numbers):\n        for idx2, elem2 in enumerate(numbers):\n            if idx != idx2:\n                distance = abs(elem - elem2)\n                if distance < threshold:\n                    return True\n\n    return False\n", "test": "\n\nMETADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\n    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\n    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\n    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n\n"}
question = inst['prompt']
tests = f"{inst['test']}\ncheck({inst['entry_point']})"

method = CoT(
    llm=llm,
    benchmark="humaneval",
)

out = method.generate(
    question=question,
    examples=HUMANEVAL_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_HUMANEVAL,
    additional_keys={},
)

In [None]:
out

### MBPP

In [None]:
question = "Write a python function to find the first repeated character in a given string."
tests = """assert first_repeated_char("abcabc") == "a"
assert first_repeated_char("abc") == None
assert first_repeated_char("123123") == "1\""""

method = CoT(
    llm=llm,
    benchmark="mbpp",
)

out = method.generate(
    question=question,
    examples=MBPP_FEWSHOT_EXAMPLES_COT,
    prompt=COT_INSTRUCTION_MBPP,
    additional_keys={"tests": tests},
)

In [None]:
out