In [1]:
!pip install llm4pcg

Collecting llm4pcg
  Using cached llm4pcg-1.0.1-py3-none-any.whl.metadata (4.6 kB)
Collecting openai (from llm4pcg)
  Downloading openai-1.35.13-py3-none-any.whl.metadata (21 kB)
Collecting distro<2,>=1.7.0 (from openai->llm4pcg)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai->llm4pcg)
  Using cached httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai->llm4pcg)
  Downloading pydantic-2.8.2-py3-none-any.whl.metadata (125 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.2/125.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting tqdm>4 (from openai->llm4pcg)
  Using cached tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai->llm4pcg)
  Using cached httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai->llm4pcg)
  Using cac

# Importing the packages

In [9]:
from pathlib import Path

from llm4pcg.competition import chat_with_llm, run_evaluation
from llm4pcg.models.trial_context import TrialContext
from llm4pcg.models.trial_loop import TrialLoop

# Configuration

In [3]:
CHARACTERS = ["A", "B", "C"]
NUM_TRIALS = 3
MODEL_NAME = "<PUT_YOUR_MODEL_NAME_HERE>"
LOCAL_MODEL_BASE_URL = "http://localhost:3000/v1"

In [4]:
SYSTEM_PROMPT = "Output in Markdown code block format (between ``` and ```). The last code block must contain all the \
necessary code required to produce a level. Output only the 'drop_block' function with proper arguments, without any \
other code. You do not need to define the 'drop_block' function or any other functions."

# Zero-Shot Prompting

In [6]:
class ZeroShotPrompting(TrialLoop):
    @staticmethod
    def run(ctx: TrialContext, target_character: str) -> str:
        """
        Runs the zero-shot prompting.
        :param ctx: The trial context.
        :param target_character: The target character.
        :return: The generated text.
        """
        prompt_template = open(Path("prompts/task-zero-shot.txt"), "r").read()

        responses = chat_with_llm(ctx, [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt_template.format(
                object=target_character
            )}])

        response = responses[0]
        return response

In [8]:
run_evaluation("zero_shot", ZeroShotPrompting, characters=CHARACTERS, num_trials=NUM_TRIALS,
               model_name=MODEL_NAME, local_model_base_url=LOCAL_MODEL_BASE_URL)

# Zero-Shot Chain-of-Thought Prompting

In [11]:
class ZeroShotCoTPrompting(TrialLoop):
    @staticmethod
    def run(ctx: TrialContext, target_character: str) -> str:
        """
        Runs the zero-shot chain-of-thought prompting.
        :param ctx: The trial context.
        :param target_character: The target character.
        :return: The generated text.
        """
        prompt_template = open(Path("prompts/task-zero-shot-cot.txt"), "r").read()

        responses = chat_with_llm(ctx, [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt_template.format(
                object=target_character
            )}])

        response = responses[0]
        return response

In [12]:
run_evaluation("zero_shot_cot", ZeroShotCoTPrompting, characters=CHARACTERS, num_trials=NUM_TRIALS,
               model_name=MODEL_NAME, local_model_base_url=LOCAL_MODEL_BASE_URL)

# Few-Shot Prompting

In [13]:
class FewShotPrompting(TrialLoop):
    @staticmethod
    def run(ctx: TrialContext, target_character: str) -> str:
        """
        Runs the few-shot prompting.
        :param ctx: The trial context.
        :param target_character: The target character.
        :return: The generated text.
        """
        prompt_template = open(Path("prompts/task-few-shot.txt"), "r").read()

        responses = chat_with_llm(ctx, [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt_template.format(
                object=target_character
            )}])

        response = responses[0]
        return response

In [14]:
run_evaluation("few_shot", FewShotPrompting, characters=CHARACTERS, num_trials=NUM_TRIALS,
               model_name=MODEL_NAME, local_model_base_url=LOCAL_MODEL_BASE_URL)

# Tree-of-Thought Prompting

In [None]:
class TreeOfThoughtPrompting(TrialLoop):
    @staticmethod
    def extract_scores(scores_str: str):
        import re
        scores_str = scores_str.lower()
        stability_pattern = r".*stability: (10|\d).*"
        similarity_pattern = r".*similarity: (10|\d).*"
        stability = 0
        similarity = 0
        if stability_match := re.search(stability_pattern, scores_str):
            stability = int(stability_match.group(1))
        if similarity_match := re.search(similarity_pattern, scores_str):
            similarity = int(similarity_match.group(1))
        return stability, similarity

    @staticmethod
    def tot(ctx: TrialContext, target_character: str) -> str:
        max_depth = 2
        branching_factor = 2

        current_content = ""

        # TODO: Implement this
        # Loop until reaching the maximum depth
        # | 1. Perform the task to generate {branching_factor} thoughts
        # | 2. Evaluate each thought and select the best one
        # | 3. Repeat the process with the selected thought
        # Format the final response in a correct format and return it

        pass

    @staticmethod
    def run(ctx: TrialContext, target_character: str) -> str:
        """
        Runs the tree-of-thought prompting.
        :param ctx: The trial context.
        :param target_character: The target character.
        :return: The generated text.
        """
        final_response = TreeOfThoughtPrompting.tot(ctx, target_character)

        return final_response

In [16]:
run_evaluation("tot", TreeOfThoughtPrompting, characters=CHARACTERS, num_trials=NUM_TRIALS,
               model_name=MODEL_NAME, local_model_base_url=LOCAL_MODEL_BASE_URL)