Feature/deterministic (#35)

mo374z · web-flow · commit d2fecf7714bf · 2025-03-12T17:23:54.000+01:00
* make vllm class deterministic
* fixes in prompt creation
diff --git a/promptolution/callbacks.py b/promptolution/callbacks.py
@@ -1,7 +1,7 @@
 """Callback classes for logging, saving, and tracking optimization progress."""
 
 import os
-import time
+from datetime import datetime
 from typing import Literal
 
 import numpy as np
@@ -64,7 +64,8 @@ def __init__(self, logger):
     def on_step_end(self, optimizer):
         """Log information about the current step."""
         self.step += 1
-        self.logger.critical(f"✨Step {self.step} ended✨")
+        time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
+        self.logger.critical(f"{time} - ✨Step {self.step} ended✨")
         for i, (prompt, score) in enumerate(zip(optimizer.prompts, optimizer.scores)):
             self.logger.critical(f"*** Prompt {i}: Score: {score}")
             self.logger.critical(f"{prompt}")
@@ -78,10 +79,11 @@ def on_train_end(self, optimizer, logs=None):
         optimizer: The optimizer object that called the callback.
         logs: Additional information to log.
         """
+        time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
         if logs is None:
-            self.logger.critical("Training ended")
+            self.logger.critical(f"{time} - Training ended")
         else:
-            self.logger.critical(f"Training ended - {logs}")
+            self.logger.critical(f"{time} - Training ended - {logs}")
 
         return True
 
@@ -109,8 +111,8 @@ def __init__(self, dir):
         self.step = 0
         self.input_tokens = 0
         self.output_tokens = 0
-        self.start_time = time.time()
-        self.step_time = time.time()
+        self.start_time = datetime.now()
+        self.step_time = datetime.now()
 
     def on_step_end(self, optimizer):
         """Save prompts and scores to csv.
@@ -124,12 +126,12 @@ def on_step_end(self, optimizer):
                 "step": [self.step] * len(optimizer.prompts),
                 "input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts),
                 "output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts),
-                "time_elapsed": [time.time() - self.step_time] * len(optimizer.prompts),
+                "time_elapsed": [(datetime.now() - self.step_time).total_seconds()] * len(optimizer.prompts),
                 "score": optimizer.scores,
                 "prompt": optimizer.prompts,
             }
         )
-        self.step_time = time.time()
+        self.step_time = datetime.now()
         self.input_tokens = optimizer.meta_llm.input_token_count
         self.output_tokens = optimizer.meta_llm.output_token_count
 
@@ -151,7 +153,8 @@ def on_train_end(self, optimizer):
                 steps=self.step,
                 input_tokens=optimizer.meta_llm.input_token_count,
                 output_tokens=optimizer.meta_llm.output_token_count,
-                time_elapsed=time.time() - self.start_time,
+                time_elapsed=(datetime.now() - self.start_time).total_seconds(),
+                time=datetime.now(),
                 score=np.array(optimizer.scores).mean(),
                 best_prompts=str(optimizer.prompts),
             ),
diff --git a/promptolution/helpers.py b/promptolution/helpers.py
@@ -27,7 +27,7 @@ def run_experiment(config: Config):
     return df
 
 
-def run_optimization(config: Config, callbacks: List = None):
+def run_optimization(config: Config, callbacks: List = None, use_token: bool = False):
     """Run the optimization phase of the experiment.
 
     Args:
@@ -37,7 +37,10 @@ def run_optimization(config: Config, callbacks: List = None):
         List[str]: The optimized list of prompts.
     """
     task = get_task(config)
-    llm = get_llm(config.meta_llm, token=config.api_token, model_storage_path=config.model_storage_path)
+    if use_token:
+        llm = get_llm(config.meta_llm, token=config.api_token)
+    else:
+        llm = get_llm(config.meta_llm, model_storage_path=config.model_storage_path, seed=config.random_seed)
     if config.predictor == "MarkerBasedClassificator":
         predictor = MarkerBasedClassificator(llm, classes=task.classes)
     elif config.predictor == "FirstOccurenceClassificator":
diff --git a/promptolution/llms/vllm.py b/promptolution/llms/vllm.py
@@ -44,12 +44,12 @@ def __init__(
         temperature: float = 0.1,
         top_p: float = 0.9,
         model_storage_path: str | None = None,
-        token: str | None = None,
         dtype: str = "auto",
         tensor_parallel_size: int = 1,
         gpu_memory_utilization: float = 0.95,
         max_model_len: int = 2048,
         trust_remote_code: bool = False,
+        seed: int = 42,
         **kwargs,
     ):
         """Initialize the VLLM with a specific model.
@@ -61,12 +61,12 @@ def __init__(
             temperature (float, optional): Sampling temperature. Defaults to 0.1.
             top_p (float, optional): Top-p sampling parameter. Defaults to 0.9.
             model_storage_path (str, optional): Directory to store the model. Defaults to None.
-            token: (str, optional): Token for accessing the model - not used in implementation yet.
             dtype (str, optional): Data type for model weights. Defaults to "float16".
             tensor_parallel_size (int, optional): Number of GPUs for tensor parallelism. Defaults to 1.
             gpu_memory_utilization (float, optional): Fraction of GPU memory to use. Defaults to 0.95.
             max_model_len (int, optional): Maximum sequence length for the model. Defaults to 2048.
             trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
+            seed (int, optional): Random seed for the model. Defaults to 42.
             **kwargs: Additional keyword arguments to pass to the LLM class initialization.
 
         Note:
@@ -81,7 +81,9 @@ def __init__(
         self.trust_remote_code = trust_remote_code
 
         # Configure sampling parameters
-        self.sampling_params = SamplingParams(temperature=temperature, top_p=top_p, max_tokens=max_generated_tokens)
+        self.sampling_params = SamplingParams(
+            temperature=temperature, top_p=top_p, max_tokens=max_generated_tokens, seed=seed
+        )
 
         # Initialize the vLLM engine with both explicit parameters and any additional kwargs
         llm_params = {
@@ -93,6 +95,7 @@ def __init__(
             "max_model_len": self.max_model_len,
             "download_dir": model_storage_path,
             "trust_remote_code": self.trust_remote_code,
+            "seed": seed,
             **kwargs,
         }
 
@@ -136,11 +139,6 @@ def _get_response(self, inputs: list[str]):
             for input in inputs
         ]
 
-        # Count input tokens
-        for prompt in prompts:
-            input_tokens = self.tokenizer.encode(prompt)
-            self.input_token_count += len(input_tokens)
-
         # generate responses for self.batch_size prompts at the same time
         all_responses = []
         for i in range(0, len(prompts), self.batch_size):
diff --git a/promptolution/optimizers/evoprompt_ga.py b/promptolution/optimizers/evoprompt_ga.py
@@ -81,6 +81,7 @@ def optimize(self, n_steps: int) -> List[str]:
             if not continue_optimization:
                 break
 
+        self._on_train_end()
         return self.prompts
 
     def _crossover(self, prompts, scores) -> str:
diff --git a/promptolution/predictors/classificator.py b/promptolution/predictors/classificator.py
@@ -75,12 +75,12 @@ class MarkerBasedClassificator(BasePredictor):
         BasePredictor: The base class for predictors in the promptolution library.
     """
 
-    def __init__(self, llm, classes, marker="<final_answer>", *args, **kwargs):
+    def __init__(self, llm, classes=None, marker="<final_answer>", *args, **kwargs):
         """Initialize the Classificator.
 
         Args:
             llm: The language model to use for predictions.
-            classes (List[str]): The list of valid class labels.
+            classes (List[str]): The list of valid class labels. If None, does not force any class.
             marker (str): The marker to use for extracting the class label.
             *args, **kwargs: Additional arguments for the BasePredictor.
         """
@@ -101,11 +101,11 @@ def _extract_preds(self, preds: List[str], shape: Tuple[int, int]) -> np.ndarray
         """
         response = []
         for pred in preds:
-            predicted_class = pred.split(self.marker)[-1].strip()
-            if predicted_class not in self.classes:
-                predicted_class = self.classes[0]
+            pred = pred.split(self.marker)[-1].strip()
+            if self.classes is not None and pred not in self.classes:
+                pred = self.classes[0]
 
-            response.append(predicted_class)
+            response.append(pred)
 
         response = np.array(response).reshape(*shape)
         return response
diff --git a/promptolution/utils/prompt_creation.py b/promptolution/utils/prompt_creation.py
@@ -42,6 +42,7 @@ def create_prompts_from_samples(
     n_samples: int = 3,
     task_description: str = None,
     n_prompts: int = 1,
+    get_uniform_labels: bool = False,
 ) -> List[str]:
     """Generate a set of prompts from dataset examples sampled from a given task.
 
@@ -59,13 +60,23 @@ def create_prompts_from_samples(
         n_samples (int): The number of samples to use for generating prompts.
         task_description (str): The description of the task to include in the prompt.
         n_prompts (int): The number of prompts to generate.
+        get_uniform_labels (bool): If True, samples are selected such that all classes are represented.
 
     Returns:
         List[str]: A list of generated prompts.
     """
+    if meta_prompt is None and task_description is None:
+        meta_prompt_template = PROMPT_CREATION_TEMPLATE
+    elif meta_prompt is None and task_description is not None:
+        meta_prompt_template = PROMPT_CREATION_TEMPLATE_TD.replace("<task_desc>", task_description)
+    elif meta_prompt is not None and task_description is None:
+        meta_prompt_template = meta_prompt
+    elif meta_prompt is not None and task_description is not None:
+        meta_prompt_template = meta_prompt.replace("<task_desc>", task_description)
+
     meta_prompts = []
     for _ in range(n_prompts):
-        if isinstance(task, ClassificationTask):
+        if isinstance(task, ClassificationTask) and get_uniform_labels:
             # if classification task sample such that all classes are represented
             unique_labels, counts = np.unique(task.ys, return_counts=True)
             proportions = counts / len(task.ys)
@@ -87,13 +98,10 @@ def create_prompts_from_samples(
             xs = task.xs[indices].tolist()
             ys = task.ys[indices].tolist()
 
-        if meta_prompt is None:
-            meta_prompt = PROMPT_CREATION_TEMPLATE
-        if task_description is None:
-            meta_prompt = PROMPT_CREATION_TEMPLATE_TD.replace("<task_desc>", task_description)
         examples = "\n\n".join([f"Input: {x}\nOutput: {y}" for x, y in zip(xs, ys)])
-        meta_prompt = meta_prompt.replace("<input_output_pairs>", examples)
+        meta_prompt = meta_prompt_template.replace("<input_output_pairs>", examples)
         meta_prompts.append(meta_prompt)
+
     prompts = llm.get_response(meta_prompts)
     prompts = [prompt.split("</prompt>")[0].split("<prompt>")[-1].strip() for prompt in prompts]
 
diff --git a/scripts/optimizer_test_run.py b/scripts/optimizer_test_run.py
@@ -16,6 +16,7 @@
 parser.add_argument("--optimizer", default="evopromptde")
 parser.add_argument("--n-steps", type=int, default=10)
 parser.add_argument("--token", default=None)
+parser.add_argument("--seed", type=int, default=187)
 args = parser.parse_args()
 
 config = Config(
@@ -29,8 +30,12 @@
     evaluation_llm=args.model,
     api_token=args.token,
     model_storage_path=args.model_storage_path,
+    random_seed=args.seed,
 )
 
-prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/{args.model}/")])
+if args.token is None:
+    prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/seedingtest/{args.model}/")])
+else:
+    prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/seedingtest/{args.model}/")], use_token=True)
 
 logger.info(f"Optimized prompts: {prompts}")