diff --git a/promptolution/callbacks.py b/promptolution/callbacks.py index 48a9b3e..452aeae 100644 --- a/promptolution/callbacks.py +++ b/promptolution/callbacks.py @@ -1,7 +1,7 @@ """Callback classes for logging, saving, and tracking optimization progress.""" import os -import time +from datetime import datetime from typing import Literal import numpy as np @@ -64,7 +64,8 @@ def __init__(self, logger): def on_step_end(self, optimizer): """Log information about the current step.""" self.step += 1 - self.logger.critical(f"✨Step {self.step} ended✨") + time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f") + self.logger.critical(f"{time} - ✨Step {self.step} ended✨") for i, (prompt, score) in enumerate(zip(optimizer.prompts, optimizer.scores)): self.logger.critical(f"*** Prompt {i}: Score: {score}") self.logger.critical(f"{prompt}") @@ -78,10 +79,11 @@ def on_train_end(self, optimizer, logs=None): optimizer: The optimizer object that called the callback. logs: Additional information to log. """ + time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f") if logs is None: - self.logger.critical("Training ended") + self.logger.critical(f"{time} - Training ended") else: - self.logger.critical(f"Training ended - {logs}") + self.logger.critical(f"{time} - Training ended - {logs}") return True @@ -109,8 +111,8 @@ def __init__(self, dir): self.step = 0 self.input_tokens = 0 self.output_tokens = 0 - self.start_time = time.time() - self.step_time = time.time() + self.start_time = datetime.now() + self.step_time = datetime.now() def on_step_end(self, optimizer): """Save prompts and scores to csv. @@ -124,12 +126,12 @@ def on_step_end(self, optimizer): "step": [self.step] * len(optimizer.prompts), "input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts), "output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts), - "time_elapsed": [time.time() - self.step_time] * len(optimizer.prompts), + "time_elapsed": [(datetime.now() - self.step_time).total_seconds()] * len(optimizer.prompts), "score": optimizer.scores, "prompt": optimizer.prompts, } ) - self.step_time = time.time() + self.step_time = datetime.now() self.input_tokens = optimizer.meta_llm.input_token_count self.output_tokens = optimizer.meta_llm.output_token_count @@ -151,7 +153,8 @@ def on_train_end(self, optimizer): steps=self.step, input_tokens=optimizer.meta_llm.input_token_count, output_tokens=optimizer.meta_llm.output_token_count, - time_elapsed=time.time() - self.start_time, + time_elapsed=(datetime.now() - self.start_time).total_seconds(), + time=datetime.now(), score=np.array(optimizer.scores).mean(), best_prompts=str(optimizer.prompts), ), diff --git a/promptolution/helpers.py b/promptolution/helpers.py index 52472ea..da70be7 100644 --- a/promptolution/helpers.py +++ b/promptolution/helpers.py @@ -27,7 +27,7 @@ def run_experiment(config: Config): return df -def run_optimization(config: Config, callbacks: List = None): +def run_optimization(config: Config, callbacks: List = None, use_token: bool = False): """Run the optimization phase of the experiment. Args: @@ -37,7 +37,10 @@ def run_optimization(config: Config, callbacks: List = None): List[str]: The optimized list of prompts. """ task = get_task(config) - llm = get_llm(config.meta_llm, token=config.api_token, model_storage_path=config.model_storage_path) + if use_token: + llm = get_llm(config.meta_llm, token=config.api_token) + else: + llm = get_llm(config.meta_llm, model_storage_path=config.model_storage_path, seed=config.random_seed) if config.predictor == "MarkerBasedClassificator": predictor = MarkerBasedClassificator(llm, classes=task.classes) elif config.predictor == "FirstOccurenceClassificator": diff --git a/promptolution/llms/vllm.py b/promptolution/llms/vllm.py index f558458..a2c0754 100644 --- a/promptolution/llms/vllm.py +++ b/promptolution/llms/vllm.py @@ -44,12 +44,12 @@ def __init__( temperature: float = 0.1, top_p: float = 0.9, model_storage_path: str | None = None, - token: str | None = None, dtype: str = "auto", tensor_parallel_size: int = 1, gpu_memory_utilization: float = 0.95, max_model_len: int = 2048, trust_remote_code: bool = False, + seed: int = 42, **kwargs, ): """Initialize the VLLM with a specific model. @@ -61,12 +61,12 @@ def __init__( temperature (float, optional): Sampling temperature. Defaults to 0.1. top_p (float, optional): Top-p sampling parameter. Defaults to 0.9. model_storage_path (str, optional): Directory to store the model. Defaults to None. - token: (str, optional): Token for accessing the model - not used in implementation yet. dtype (str, optional): Data type for model weights. Defaults to "float16". tensor_parallel_size (int, optional): Number of GPUs for tensor parallelism. Defaults to 1. gpu_memory_utilization (float, optional): Fraction of GPU memory to use. Defaults to 0.95. max_model_len (int, optional): Maximum sequence length for the model. Defaults to 2048. trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False. + seed (int, optional): Random seed for the model. Defaults to 42. **kwargs: Additional keyword arguments to pass to the LLM class initialization. Note: @@ -81,7 +81,9 @@ def __init__( self.trust_remote_code = trust_remote_code # Configure sampling parameters - self.sampling_params = SamplingParams(temperature=temperature, top_p=top_p, max_tokens=max_generated_tokens) + self.sampling_params = SamplingParams( + temperature=temperature, top_p=top_p, max_tokens=max_generated_tokens, seed=seed + ) # Initialize the vLLM engine with both explicit parameters and any additional kwargs llm_params = { @@ -93,6 +95,7 @@ def __init__( "max_model_len": self.max_model_len, "download_dir": model_storage_path, "trust_remote_code": self.trust_remote_code, + "seed": seed, **kwargs, } @@ -136,11 +139,6 @@ def _get_response(self, inputs: list[str]): for input in inputs ] - # Count input tokens - for prompt in prompts: - input_tokens = self.tokenizer.encode(prompt) - self.input_token_count += len(input_tokens) - # generate responses for self.batch_size prompts at the same time all_responses = [] for i in range(0, len(prompts), self.batch_size): diff --git a/promptolution/optimizers/evoprompt_ga.py b/promptolution/optimizers/evoprompt_ga.py index f6efcb8..b26ff53 100644 --- a/promptolution/optimizers/evoprompt_ga.py +++ b/promptolution/optimizers/evoprompt_ga.py @@ -81,6 +81,7 @@ def optimize(self, n_steps: int) -> List[str]: if not continue_optimization: break + self._on_train_end() return self.prompts def _crossover(self, prompts, scores) -> str: diff --git a/promptolution/predictors/classificator.py b/promptolution/predictors/classificator.py index 89eb5d4..bb05930 100644 --- a/promptolution/predictors/classificator.py +++ b/promptolution/predictors/classificator.py @@ -75,12 +75,12 @@ class MarkerBasedClassificator(BasePredictor): BasePredictor: The base class for predictors in the promptolution library. """ - def __init__(self, llm, classes, marker="", *args, **kwargs): + def __init__(self, llm, classes=None, marker="", *args, **kwargs): """Initialize the Classificator. Args: llm: The language model to use for predictions. - classes (List[str]): The list of valid class labels. + classes (List[str]): The list of valid class labels. If None, does not force any class. marker (str): The marker to use for extracting the class label. *args, **kwargs: Additional arguments for the BasePredictor. """ @@ -101,11 +101,11 @@ def _extract_preds(self, preds: List[str], shape: Tuple[int, int]) -> np.ndarray """ response = [] for pred in preds: - predicted_class = pred.split(self.marker)[-1].strip() - if predicted_class not in self.classes: - predicted_class = self.classes[0] + pred = pred.split(self.marker)[-1].strip() + if self.classes is not None and pred not in self.classes: + pred = self.classes[0] - response.append(predicted_class) + response.append(pred) response = np.array(response).reshape(*shape) return response diff --git a/promptolution/utils/prompt_creation.py b/promptolution/utils/prompt_creation.py index 08e88dd..85a613e 100644 --- a/promptolution/utils/prompt_creation.py +++ b/promptolution/utils/prompt_creation.py @@ -42,6 +42,7 @@ def create_prompts_from_samples( n_samples: int = 3, task_description: str = None, n_prompts: int = 1, + get_uniform_labels: bool = False, ) -> List[str]: """Generate a set of prompts from dataset examples sampled from a given task. @@ -59,13 +60,23 @@ def create_prompts_from_samples( n_samples (int): The number of samples to use for generating prompts. task_description (str): The description of the task to include in the prompt. n_prompts (int): The number of prompts to generate. + get_uniform_labels (bool): If True, samples are selected such that all classes are represented. Returns: List[str]: A list of generated prompts. """ + if meta_prompt is None and task_description is None: + meta_prompt_template = PROMPT_CREATION_TEMPLATE + elif meta_prompt is None and task_description is not None: + meta_prompt_template = PROMPT_CREATION_TEMPLATE_TD.replace("", task_description) + elif meta_prompt is not None and task_description is None: + meta_prompt_template = meta_prompt + elif meta_prompt is not None and task_description is not None: + meta_prompt_template = meta_prompt.replace("", task_description) + meta_prompts = [] for _ in range(n_prompts): - if isinstance(task, ClassificationTask): + if isinstance(task, ClassificationTask) and get_uniform_labels: # if classification task sample such that all classes are represented unique_labels, counts = np.unique(task.ys, return_counts=True) proportions = counts / len(task.ys) @@ -87,13 +98,10 @@ def create_prompts_from_samples( xs = task.xs[indices].tolist() ys = task.ys[indices].tolist() - if meta_prompt is None: - meta_prompt = PROMPT_CREATION_TEMPLATE - if task_description is None: - meta_prompt = PROMPT_CREATION_TEMPLATE_TD.replace("", task_description) examples = "\n\n".join([f"Input: {x}\nOutput: {y}" for x, y in zip(xs, ys)]) - meta_prompt = meta_prompt.replace("", examples) + meta_prompt = meta_prompt_template.replace("", examples) meta_prompts.append(meta_prompt) + prompts = llm.get_response(meta_prompts) prompts = [prompt.split("")[0].split("")[-1].strip() for prompt in prompts] diff --git a/scripts/optimizer_test_run.py b/scripts/optimizer_test_run.py index d60efb9..802208e 100644 --- a/scripts/optimizer_test_run.py +++ b/scripts/optimizer_test_run.py @@ -16,6 +16,7 @@ parser.add_argument("--optimizer", default="evopromptde") parser.add_argument("--n-steps", type=int, default=10) parser.add_argument("--token", default=None) +parser.add_argument("--seed", type=int, default=187) args = parser.parse_args() config = Config( @@ -29,8 +30,12 @@ evaluation_llm=args.model, api_token=args.token, model_storage_path=args.model_storage_path, + random_seed=args.seed, ) -prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/{args.model}/")]) +if args.token is None: + prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/seedingtest/{args.model}/")]) +else: + prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/seedingtest/{args.model}/")], use_token=True) logger.info(f"Optimized prompts: {prompts}")