In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

import random
import json
from typing import Dict, Tuple, Optional, Any
from deap import base, creator, tools

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(api_key=openai_api_key)

In [2]:
class PromptStr(str):
    """String subclass that can store traceability fields."""
    def __new__(cls, value, diff=""):
        obj = str.__new__(cls, value)
        obj.diff = diff
        return obj

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", PromptStr, fitness=creator.FitnessMax)

In [3]:
from src.evolution.genetic_operators import GeneticOperators

gen_operator = GeneticOperators(llm)


def load_prompts(file_path='../data/data2.json'):
    with open(file_path, 'r') as f:
        data = json.load(f)
        return [item["prompt"] for item in data]

prompts = load_prompts()

def get_random_prompt():
    return random.choice(prompts)

In [4]:
def crossover_prompts(parent1: PromptStr, parent2: PromptStr) -> Tuple[PromptStr, PromptStr]:
    """
    DEAP-compatible crossover function that modifies parents in-place
    """
    child1, child2 = gen_operator.crossover_operator(
        parent1=str(parent1),
        parent2=str(parent2),
        model="gpt-4.1-mini"
    )
    # Modify parents in-place (required by DEAP)
    new_parent1 = creator.Individual(child1)
    new_parent2 = creator.Individual(child2)
    return new_parent1, new_parent2


def mutate_prompt_individual(ind: PromptStr,
                             trigger_id: Optional[str] ="REWARD_MISSPECIFICATION",
                             dim_id: Optional[str]=None) -> Tuple[PromptStr]:
    """Call the mutator function and modify individual in-place"""
    clean, diff = gen_operator.mutate_operator(
        prompt=str(ind),
        trigger_id=trigger_id,
        dim_id=dim_id,
        model="gpt-4.1-mini"
    )

    # print(f"Original: {ind}\nMutated: {clean}")
    new_ind = creator.Individual(clean, diff=diff)
    return (new_ind,)

def dummy_fitness(individual):
    return random.random(),

In [5]:
toolbox = base.Toolbox()
toolbox.register("individual", tools.initIterate, creator.Individual, get_random_prompt)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", crossover_prompts)
toolbox.register("mutate", mutate_prompt_individual, trigger_id="REWARD_MISSPECIFICATION", dim_id=None)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", dummy_fitness)

In [None]:
from deap import algorithms
import numpy as np

pop = toolbox.population(n=10)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("Avg", np.mean)
stats.register("Std", np.std)
stats.register("Min", np.min)
stats.register("Max", np.max)

final_pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=10,
         stats=stats , halloffame=hof)

print("=== ACTUAL BEST PERFORMERS ===")
for i, individual in enumerate(hof):
    print(f"#{i+1} - Fitness: {individual.fitness.values[0]:.4f}")
    print(f"Prompt: {individual}")
    print()

gen	nevals	Avg     	Std     	Min       	Max     
0  	10    	0.421105	0.309206	0.00666179	0.954281
1  	10    	0.654944	0.210595	0.316332  	0.946266
2  	7     	0.568646	0.272181	0.101285  	0.946266
3  	9     	0.597351	0.240274	0.0826701 	0.904061
4  	8     	0.492073	0.251668	0.0504669 	0.806259
