Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ results/
poetry.lock
CLAUDE.md
**/CLAUDE.local.md
.mypy_cache/
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ repos:
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
files: ^promptolution/
additional_dependencies:
- types-requests
- pandas-stubs
- numpy
args: [--explicit-package-bases, --config-file=pyproject.toml]
- repo: https://github.com/pycqa/pydocstyle
rev: 6.3.0
hooks:
Expand Down
4 changes: 2 additions & 2 deletions promptolution/exemplar_selectors/base_exemplar_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from abc import ABC, abstractmethod

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional

if TYPE_CHECKING: # pragma: no cover
from promptolution.predictors.base_predictor import BasePredictor
Expand All @@ -18,7 +18,7 @@ class BaseExemplarSelector(ABC):
that all exemplar selectors should implement.
"""

def __init__(self, task: "BaseTask", predictor: "BasePredictor", config: "ExperimentConfig" = None):
def __init__(self, task: "BaseTask", predictor: "BasePredictor", config: Optional["ExperimentConfig"] = None):
"""Initialize the BaseExemplarSelector.

Args:
Expand Down
13 changes: 7 additions & 6 deletions promptolution/exemplar_selectors/random_search_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,29 @@ class RandomSearchSelector(BaseExemplarSelector):
evaluates their performance, and selects the best performing set.
"""

def select_exemplars(self, prompt, n_examples: int = 5, n_trials: int = 5):
def select_exemplars(self, prompt: str, n_trials: int = 5) -> str:
"""Select exemplars using a random search strategy.

This method generates multiple sets of random examples, evaluates their performance
when combined with the original prompt, and returns the best performing set.

Args:
prompt (str): The input prompt to base the exemplar selection on.
n_examples (int, optional): The number of exemplars to select in each trial. Defaults to 5.
n_trials (int, optional): The number of random trials to perform. Defaults to 5.

Returns:
str: The best performing prompt, which includes the original prompt and the selected exemplars.
"""
best_score = 0
best_score = 0.0
best_prompt = prompt

for _ in range(n_trials):
_, seq = self.task.evaluate(prompt, self.predictor, n_samples=n_examples, subsample=True, return_seq=True)
prompt_with_examples = "\n\n".join([prompt] + seq) + "\n\n"
_, seq = self.task.evaluate(
prompt, self.predictor, eval_strategy="subsample", return_seq=True, return_agg_scores=False
)
prompt_with_examples = "\n\n".join([prompt] + [seq[0][0]]) + "\n\n"
# evaluate prompts as few shot prompt
score = self.task.evaluate(prompt_with_examples, self.predictor, subsample=True)
score = self.task.evaluate(prompt_with_examples, self.predictor, eval_strategy="subsample")[0]
if score > best_score:
best_score = score
best_prompt = prompt_with_examples
Expand Down
26 changes: 17 additions & 9 deletions promptolution/exemplar_selectors/random_selector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Random exemplar selector."""

from typing import TYPE_CHECKING
import numpy as np

from typing import TYPE_CHECKING, List, Optional

from promptolution.exemplar_selectors.base_exemplar_selector import BaseExemplarSelector

Expand All @@ -18,8 +20,12 @@ class RandomSelector(BaseExemplarSelector):
"""

def __init__(
self, task: "BaseTask", predictor: "BasePredictor", desired_score: int = 1, config: "ExperimentConfig" = None
):
self,
task: "BaseTask",
predictor: "BasePredictor",
desired_score: int = 1,
config: Optional["ExperimentConfig"] = None,
) -> None:
"""Initialize the RandomSelector.

Args:
Expand All @@ -44,11 +50,13 @@ def select_exemplars(self, prompt: str, n_examples: int = 5) -> str:
Returns:
str: A new prompt that includes the original prompt and the selected exemplars.
"""
examples = []
examples: List[str] = []
while len(examples) < n_examples:
score, seq = self.task.evaluate(prompt, self.predictor, n_samples=1, return_seq=True)
scores, seqs = self.task.evaluate(
prompt, self.predictor, eval_strategy="subsample", return_seq=True, return_agg_scores=False
)
score = np.mean(scores)
seq = seqs[0][0]
if score == self.desired_score:
examples.append(seq[0])
prompt = "\n\n".join([prompt] + examples) + "\n\n"

return prompt
examples.append(seq)
return "\n\n".join([prompt] + examples) + "\n\n"
86 changes: 44 additions & 42 deletions promptolution/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Helper functions for the usage of the libary."""


from typing import TYPE_CHECKING, Callable, List, Literal
from typing import TYPE_CHECKING, Callable, List, Literal, Optional

from promptolution.tasks.judge_tasks import JudgeTask
from promptolution.tasks.reward_tasks import RewardTask
Expand Down Expand Up @@ -45,7 +45,7 @@
logger = get_logger(__name__)


def run_experiment(df: pd.DataFrame, config: "ExperimentConfig"):
def run_experiment(df: pd.DataFrame, config: "ExperimentConfig") -> pd.DataFrame:
"""Run a full experiment based on the provided configuration.

Args:
Expand Down Expand Up @@ -79,7 +79,7 @@ def run_optimization(df: pd.DataFrame, config: "ExperimentConfig") -> List[str]:
llm = get_llm(config=config)
predictor = get_predictor(llm, config=config)

config.task_description = config.task_description + " " + predictor.extraction_description
config.task_description = (config.task_description or "") + " " + (predictor.extraction_description or "")
if config.optimizer == "capo" and (config.eval_strategy is None or "block" not in config.eval_strategy):
logger.warning("📌 CAPO requires block evaluation strategy. Setting it to 'sequential_block'.")
config.eval_strategy = "sequential_block"
Expand Down Expand Up @@ -126,7 +126,7 @@ def run_evaluation(df: pd.DataFrame, config: "ExperimentConfig", prompts: List[s
return df


def get_llm(model_id: str = None, config: "ExperimentConfig" = None) -> "BaseLLM":
def get_llm(model_id: Optional[str] = None, config: Optional["ExperimentConfig"] = None) -> "BaseLLM":
"""Factory function to create and return a language model instance based on the provided model_id.

This function supports three types of language models:
Expand All @@ -144,16 +144,18 @@ def get_llm(model_id: str = None, config: "ExperimentConfig" = None) -> "BaseLLM
Returns:
An instance of LocalLLM, or APILLM based on the model_id.
"""
if model_id is None:
model_id = config.model_id
if "local" in model_id:
model_id = "-".join(model_id.split("-")[1:])
return LocalLLM(model_id, config)
if "vllm" in model_id:
model_id = "-".join(model_id.split("-")[1:])
return VLLM(model_id, config=config)
final_model_id = model_id or (config.model_id if config else None)
if not final_model_id:
raise ValueError("model_id must be provided either directly or through config.")

return APILLM(model_id=model_id, config=config)
if "local" in final_model_id:
model_name = "-".join(final_model_id.split("-")[1:])
return LocalLLM(model_name, config=config)
if "vllm" in final_model_id:
model_name = "-".join(final_model_id.split("-")[1:])
return VLLM(model_name, config=config)

return APILLM(model_id=final_model_id, config=config)


def get_task(
Expand All @@ -174,16 +176,19 @@ def get_task(
Returns:
BaseTask: An instance of a task class based on the provided DataFrame and configuration.
"""
if task_type is None:
task_type = config.task_type
final_task_type = task_type or (config.task_type if config else None)

if task_type == "reward":
if final_task_type == "reward":
if reward_function is None:
reward_function = config.reward_function if config else None
assert reward_function is not None, "Reward function must be provided for reward tasks."
return RewardTask(
df=df,
reward_function=reward_function,
config=config,
)
elif task_type == "judge":
elif final_task_type == "judge":
assert judge_llm is not None, "Judge LLM must be provided for judge tasks."
return JudgeTask(df, judge_llm=judge_llm, config=config)

return ClassificationTask(df, config=config)
Expand All @@ -193,10 +198,9 @@ def get_optimizer(
predictor: "BasePredictor",
meta_llm: "BaseLLM",
task: "BaseTask",
optimizer: OptimizerType = None,
meta_prompt: str = None,
task_description: str = None,
config: "ExperimentConfig" = None,
optimizer: Optional[OptimizerType] = None,
task_description: Optional[str] = None,
config: Optional["ExperimentConfig"] = None,
) -> "BaseOptimizer":
"""Creates and returns an optimizer instance based on provided parameters.

Expand All @@ -215,22 +219,18 @@ def get_optimizer(
Raises:
ValueError: If an unknown optimizer type is specified
"""
if optimizer is None:
optimizer = config.optimizer
if task_description is None:
task_description = config.task_description
if meta_prompt is None and hasattr(config, "meta_prompt"):
meta_prompt = config.meta_prompt

if config.optimizer == "capo":
final_optimizer = optimizer or (config.optimizer if config else None)
final_task_description = task_description or (config.task_description if config else None)

if final_optimizer == "capo":
crossover_template = (
CAPO_CROSSOVER_TEMPLATE.replace("<task_desc>", task_description)
if task_description
CAPO_CROSSOVER_TEMPLATE.replace("<task_desc>", final_task_description)
if final_task_description
else CAPO_CROSSOVER_TEMPLATE
)
mutation_template = (
CAPO_MUTATION_TEMPLATE.replace("<task_desc>", task_description)
if task_description
CAPO_MUTATION_TEMPLATE.replace("<task_desc>", final_task_description)
if final_task_description
else CAPO_MUTATION_TEMPLATE
)

Expand All @@ -243,27 +243,29 @@ def get_optimizer(
config=config,
)

if config.optimizer == "evopromptde":
if final_optimizer == "evopromptde":
template = (
EVOPROMPT_DE_TEMPLATE_TD.replace("<task_desc>", task_description)
if task_description
EVOPROMPT_DE_TEMPLATE_TD.replace("<task_desc>", final_task_description)
if final_task_description
else EVOPROMPT_DE_TEMPLATE
)
return EvoPromptDE(predictor=predictor, meta_llm=meta_llm, task=task, prompt_template=template, config=config)

if config.optimizer == "evopromptga":
if final_optimizer == "evopromptga":
template = (
EVOPROMPT_GA_TEMPLATE_TD.replace("<task_desc>", task_description)
if task_description
EVOPROMPT_GA_TEMPLATE_TD.replace("<task_desc>", final_task_description)
if final_task_description
else EVOPROMPT_GA_TEMPLATE
)
return EvoPromptGA(predictor=predictor, meta_llm=meta_llm, task=task, prompt_template=template, config=config)

if config.optimizer == "opro":
template = OPRO_TEMPLATE_TD.replace("<task_desc>", task_description) if task_description else OPRO_TEMPLATE
if final_optimizer == "opro":
template = (
OPRO_TEMPLATE_TD.replace("<task_desc>", final_task_description) if final_task_description else OPRO_TEMPLATE
)
return OPRO(predictor=predictor, meta_llm=meta_llm, task=task, prompt_template=template, config=config)

raise ValueError(f"Unknown optimizer: {config.optimizer}")
raise ValueError(f"Unknown optimizer: {final_optimizer}")


def get_exemplar_selector(
Expand Down
55 changes: 40 additions & 15 deletions promptolution/llms/api_llm.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""Module to interface with various language models through their respective APIs."""


try:
import asyncio

from openai import AsyncOpenAI
from openai.types.chat import ChatCompletion, ChatCompletionMessageParam

import_successful = True
except ImportError:
import_successful = False


from typing import TYPE_CHECKING, List
from typing import TYPE_CHECKING, Dict, List, Optional

from promptolution.llms.base_llm import BaseLLM

Expand All @@ -23,9 +23,21 @@
logger = get_logger(__name__)


async def _invoke_model(prompt, system_prompt, max_tokens, model_id, client, semaphore, max_retries=20, retry_delay=5):
async def _invoke_model(
prompt: str,
system_prompt: str,
max_tokens: int,
model_id: str,
client: AsyncOpenAI,
semaphore: asyncio.Semaphore,
max_retries: int = 20,
retry_delay: float = 5,
) -> ChatCompletion:
async with semaphore:
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
messages: List[ChatCompletionMessageParam] = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]

for attempt in range(max_retries + 1): # +1 for the initial attempt
try:
Expand All @@ -46,7 +58,8 @@ async def _invoke_model(prompt, system_prompt, max_tokens, model_id, client, sem
else:
# Log the final failure and re-raise the exception
logger.error(f"❌ API call failed after {max_retries + 1} attempts: {str(e)}")
raise
raise # Re-raise the exception after all retries fail
raise RuntimeError("Failed to get response after multiple retries.")


class APILLM(BaseLLM):
Expand All @@ -65,13 +78,13 @@ class APILLM(BaseLLM):

def __init__(
self,
api_url: str = None,
model_id: str = None,
api_key: str = None,
max_concurrent_calls=50,
max_tokens=512,
config: "ExperimentConfig" = None,
):
api_url: Optional[str] = None,
model_id: Optional[str] = None,
api_key: Optional[str] = None,
max_concurrent_calls: int = 50,
max_tokens: int = 512,
config: Optional["ExperimentConfig"] = None,
) -> None:
"""Initialize the APILLM with a specific model and API configuration.

Args:
Expand Down Expand Up @@ -103,14 +116,26 @@ def __init__(

def _get_response(self, prompts: List[str], system_prompts: List[str]) -> List[str]:
# Setup for async execution in sync context
loop = asyncio.get_event_loop()
try:
loop = asyncio.get_running_loop()
except RuntimeError: # 'get_running_loop' raises a RuntimeError if there is no running loop
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

responses = loop.run_until_complete(self._get_response_async(prompts, system_prompts))
return responses

async def _get_response_async(self, prompts: List[str], system_prompts: List[str]) -> List[str]:
assert self.model_id is not None, "model_id must be set"
tasks = [
_invoke_model(prompt, system_prompt, self.max_tokens, self.model_id, self.client, self.semaphore)
for prompt, system_prompt in zip(prompts, system_prompts)
]
responses = await asyncio.gather(*tasks)
return [response.choices[0].message.content for response in responses]
messages = await asyncio.gather(*tasks)
responses = []
for message in messages:
response = message.choices[0].message.content
if response is None:
raise ValueError("Received None response from the API.")
responses.append(response)
return responses
Loading