In [None]:
# Uncomment this cell to run prompt optimization on a study. Most recent runs require running on a server.
# !uv run python -m syftr.prompt_optimization --study-config studies/rank1--rag-and-agents--drdocs_hf.yaml

In [2]:
# This cell takes two studies (witout PO and with it and compares the result). Please, note commented code:
# PO on studies/rank1--rag-and-agents--drdocs_hf.yaml weren't run on pareto, so we need all trials to perform a join, 
# not only Pareto trials.

import pandas as pd
import optuna
from syftr.optuna_helper import get_pareto_mask
from syftr.configuration import cfg

RAW_STUDY = "rank1--rag-and-agents--drdocs_hf"
PO_STUDY = "rank1--rag-and-agents--drdocs_hf_prompt_optimization"

df = optuna.load_study(
    study_name=RAW_STUDY, storage=cfg.database.get_optuna_storage()
).trials_dataframe()
# df = df[df["values_0"] > 0.5]
# pareto_mask = get_pareto_mask(df)
# df = df[get_pareto_mask]
df["study_name"] = RAW_STUDY

optimized_df = optuna.load_study(
    study_name=PO_STUDY, storage=cfg.database.get_optuna_storage()
).trials_dataframe()
optimized_df = optimized_df.drop_duplicates(
    subset=["user_attrs_parent_number"], keep="last"
)
optimized_df["study_name"] = PO_STUDY

results = pd.merge(
    df,
    optimized_df,
    left_on="number",
    right_on="user_attrs_parent_number",
)
results["Cost mult, x"] = results["values_1_y"] / results["values_1_x"]
results["Acc. improvement"] = (results["values_0_y"] - results["values_0_x"]) * 100
results = results.rename(
    columns={
        "values_0_x": "Accuracy pre-opt",
        "values_1_x": "Cost pre-opt",
        "values_0_y": "Accuracy post-opt",
        "values_1_y": "Cost post-opt",
    }
)
results["Accuracy pre-opt"] *= 100
results["Accuracy post-opt"] *= 100
results = results[
    [
        "Accuracy pre-opt",
        "Cost pre-opt",
        "Accuracy post-opt",
        "Cost post-opt",
        "Acc. improvement",
        "Cost mult, x",
        "user_attrs_flow_name",
        "user_attrs_parent_number",
    ]
]
results

Unnamed: 0,Accuracy pre-opt,Cost pre-opt,Accuracy post-opt,Cost post-opt,Acc. improvement,"Cost mult, x",user_attrs_flow_name,user_attrs_parent_number
0,41.37931,0.000976,31.034483,0.000107,-10.344828,0.109949,ReActAgentFlow,121
1,40.0,0.000873,48.717949,0.000125,8.717949,0.143424,ReActAgentFlow,230
2,55.172414,0.001171,50.0,0.000128,-5.172414,0.109069,ReActAgentFlow,269
3,33.333333,0.000731,0.0,0.00103,-33.333333,1.409807,CritiqueAgentFlow,270
4,82.5,0.005211,36.666667,0.001359,-45.833333,0.260742,SubQuestionRAGFlow,505
5,92.0,0.011132,93.877551,0.009168,1.877551,0.823559,SubQuestionRAGFlow,521
6,90.0,0.010543,70.0,0.003823,-20.0,0.362654,SubQuestionRAGFlow,596
7,92.307692,0.017127,21.428571,0.0021,-70.879121,0.122607,SubQuestionRAGFlow,623
8,66.666667,0.004364,53.571429,0.005106,-13.095238,1.170131,ReActAgentFlow,626


In [3]:
optimized_df["user_attrs_optimized_dataset_description"].tolist()

["The dataset describes specific aspects of DataRobot's services, including security features, explainability tools for AI, real-time scoring methods, integration guides, tutorials, setup documentation, multimodal modeling capabilities, hyperparameter tuning strategies, deployment methodologies, and troubleshooting practices. Utilize this context to provide comprehensive and detailed answers related to API endpoints, workflows, configurations, settings, and the nuances of DataRobot’s features. Ensure answers are complete, relevant, and specifically tailored to the queries posed.",
 'The dataset is structured to optimize question answering by emphasizing clarity and precision. Annotated templates and key domain-specific procedures prioritize relevance and correctness, ensuring responses are aligned directly with user needs, without extraneous or irrelevant details.',
 "Provide precise and user-scenario-specific guidance regarding DataRobot's features, integrations, and APIs, ensuring ex