In [None]:
from aces.environement.p3.aces_p3 import ACES_p3
from dataclasses import dataclass, field
from typing import Optional
from transformers import HfArgumentParser



@dataclass
class AcesArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.DataTrainingArguments
    """

    environement_name : str = field( default = "p3", metadata={"help": "environment name"})
    path_archive : str = field(
        default = "", 
        metadata={"help": "path to the archive if empty load the default archive"}
    )
    path_save: str = field( 
        default = "/home/flowers/work/aces/save_data/",
        metadata={"help": "path to save the archive"}
    )
    name_experience: str = field( default = "aces_P3_expe", metadata={"help": "name of the experience (use for saving)"})
    n_generation: int = field( default = 100, metadata={"help": "number of generation to run"})
    num_solutions: int = field(
        default = 50, metadata={"help": "number of solutions to generate to compute the difficulty score"}
    )
    batch_size: int = field( default = 32, metadata={"help": "number of puzzles to create per generation"})
    n_fewshot_examples: int = field( default = 3, metadata={"help": "number of example in context" })
    max_descriptor_targeted: int = field(
        default = 5,
        metadata={"help": "number of max descriptor to target (at most `max_descriptor_targeted` semantic descriptor sample as goal)"})
    mode_sampling_goal: str = field(
        default = "uniform",
        metadata={"help": "['uniform','smart','none'], uniform sample goal uniformely, smart: sample unexplored goal close that are within 1 of distance of already explored goal in the semantic space"})
    seed: int = field(default=0)
    sampling_strategy_examples_from_niche: str = field(
        default='soft_normalised',
        metadata={"help": "sampling strategy to sample examples from a niche, choice: 'uniform','prob_best_5','soft_normalised'; need to explain difference"}
    )
    temperature_sampling_strategy_examples_from_niche: float = field(
        default= 0.2, 
        metadata={"help": "temperature softmax to sample example given their fitness given a niche"}
    )
    puzzle_generation_strategy: str = field(
    default= "aces_elm", 
    metadata={"help":"startegy to generate new puzzle, choice: ['aces','aces_elm'] todo 'wizard_coder'"})
    difficulty_min_target: int = field(default = 90, metadata={"help":"difficulty min to target /100"})
    difficulty_max_target: int = field(default = 100, metadata={"help":"difficulty min to target /100"})
    save_every_n_generations: int = field(default = 3, metadata={"help":"save archive every n generations"})
    path_checkpoint_archive: str = field(
        default="",
        metadata={"help":"if != '' resume experiment from the given a archive checkpoint "})
    

@dataclass
class LLMArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.DataTrainingArguments
    """

    model_name_or_path: str = field(
        default="/home/flowers/work/hf/Qwen2.5-0.5B-Instruct",#"/home/flowers/work/hf/Qwen2.5-Coder-3B-Instruct",
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    online: Optional[bool] = field(
        default = False,
        metadata={
            "help": "use vllm server if True else use offline vllm"
        },
    )
    base_url: Optional[str] = field(
        default="http://localhost:8000",
        metadata={
            "help": "base url for vllm server"
        },
    )
    api_key: Optional[str] = field(
        default="",
        metadata={
            "help": "api key "
        },
    )
    gpu: Optional[int] = field(
        default = 1,
        metadata={
            "help": "number of gpus to use (vllm)"
        },
    )
    temperature: Optional[float] = field(
        default = 1.0,
        metadata={
            "help": "temperature"
        },
    )
    temperature_labeller: Optional[float] = field(
        default = 0.,
        metadata={
            "help": "temperature labeller (semantic descriptor)"
        },
    )
    min_p: Optional[float] = field(
        default = 0.05,
        metadata={
            "help": "min_p"
        },
    )
    max_tokens: Optional[int] = field(
        default = 4000,
        metadata={
            "help": "max tokens"
        },
    )
    max_model_length: Optional[int] = field(
        default = 25000,
        metadata={
            "help": "max context size"
        },
    )
    swap_space: Optional[float] = field(
        default=5,
        metadata={
            "help": "swap space (RAM memory for cache)"
        }
    )
parser = HfArgumentParser((AcesArguments,LLMArguments))
# aces_args, llm_args = parser.parse_args_into_dataclasses()
aces_args, llm_args = AcesArguments(), LLMArguments()
print("args:")
print(aces_args)
print(llm_args)
aces= ACES_p3(aces_args, llm_args)
aces.run()

args:
AcesArguments(environement_name='p3', path_archive='', path_save='/home/flowers/work/aces/save_data/', name_experience='aces_P3_expe', n_generation=100, num_solutions=50, batch_size=32, n_fewshot_examples=3, max_descriptor_targeted=5, mode_sampling_goal='uniform', seed=0, sampling_strategy_examples_from_niche='soft_normalised', temperature_sampling_strategy_examples_from_niche=0.2, puzzle_generation_strategy='aces_elm', difficulty_min_target=90, difficulty_max_target=100, save_every_n_generations=3, path_checkpoint_archive='')
LLMArguments(model_name_or_path='/home/flowers/work/hf/Qwen2.5-0.5B-Instruct', online=False, base_url='http://localhost:8000', api_key='', gpu=1, temperature=1.0, temperature_labeller=0.0, min_p=0.05, max_tokens=4000, max_model_length=25000, swap_space=5)
init LLM client
INFO 12-06 13:33:40 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
INFO 12-06 13:33:40 llm_engine.py:249] Initializing an LLM engine

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 12-06 13:33:41 model_runner.py:1077] Loading model weights took 0.9276 GB
INFO 12-06 13:33:42 worker.py:232] Memory profiling results: total_gpu_memory=15.70GiB initial_memory_usage=1.26GiB peak_torch_memory=2.36GiB memory_usage_post_profile=1.29GiB non_torch_memory=0.36GiB kv_cache_size=11.41GiB gpu_memory_utilization=0.90
INFO 12-06 13:33:42 gpu_executor.py:113] # GPU blocks: 62336, # CPU blocks: 27306
INFO 12-06 13:33:42 gpu_executor.py:117] Maximum concurrency for 25000 tokens per request: 39.90x
INFO 12-06 13:33:46 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 12-06 13:33:46 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INF

Processed prompts:  61%|██████    | 84/138 [00:18<00:00, 54.49it/s, est. speed input: 11867.55 toks/s, output: 3090.70 toks/s]