<a href="https://colab.research.google.com/github/Yang-Heewon/CV_submission/blob/main/Tutorial_Prompt_Optimization_ipynb%EC%9D%98_%EC%B5%9C%EC%A2%85%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Tutorial: Optimizing a Prompt

![TextGrad](https://github.com/vinid/data/blob/master/logo_full.png?raw=true)

An autograd engine -- for textual gradients!

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/zou-group/TextGrad/blob/main/examples/notebooks/Prompt-Optimization.ipynb)
[![GitHub license](https://img.shields.io/badge/License-MIT-blue.svg)](https://lbesson.mit-license.org/)
[![Arxiv](https://img.shields.io/badge/arXiv-2406.07496-B31B1B.svg)](https://arxiv.org/abs/2406.07496)
[![Documentation Status](https://readthedocs.org/projects/textgrad/badge/?version=latest)](https://textgrad.readthedocs.io/en/latest/?badge=latest)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/textgrad)](https://pypi.org/project/textgrad/)
[![PyPI](https://img.shields.io/pypi/v/textgrad)](https://pypi.org/project/textgrad/)

**Objectives:**

* In this tutorial, we will run prompt optimization.

**Requirements:**

* You need to have an OpenAI API key to run this tutorial. This should be set as an environment variable as OPENAI_API_KEY.


In [None]:
!pip install textgrad # you might need to restart the notebook after installing textgrad

import argparse
import concurrent
from dotenv import load_dotenv
from tqdm import tqdm
import textgrad as tg
from textgrad.tasks import load_task
import numpy as np
import random
load_dotenv(override=True)




False

Let's first define some support functions

In [None]:
def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)

In [None]:
def eval_sample(item, eval_fn, model):
    """
    This function allows us to evaluate if an answer to a question in the prompt is a good answer.

    """
    x, y = item
    x = tg.Variable(x, requires_grad=False, role_description="query to the language model")
    y = tg.Variable(y, requires_grad=False, role_description="correct answer for the query")
    response = model(x)
    try:
        eval_output_variable = eval_fn(inputs=dict(prediction=response, ground_truth_answer=y))
        return int(eval_output_variable.value)
    except:
        eval_output_variable = eval_fn([x, y, response])
        eval_output_parsed = eval_fn.parse_output(eval_output_variable)
        return int(eval_output_parsed)

In [None]:
def eval_dataset(test_set, eval_fn, model, max_samples: int=None):
    if max_samples is None:
        max_samples = len(test_set)
    accuracy_list = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        futures = []
        for _, sample in enumerate(test_set):

            future = executor.submit(eval_sample, sample, eval_fn, model)
            futures.append(future)
            if len(futures) >= max_samples:
                break
        tqdm_loader = tqdm(concurrent.futures.as_completed(futures), total=len(futures), position=0)
        for future in tqdm_loader:
            acc_item = future.result()
            accuracy_list.append(acc_item)
            tqdm_loader.set_description(f"Accuracy: {np.mean(accuracy_list)}")
    return accuracy_list

In [None]:
def run_validation_revert(system_prompt: tg.Variable, results, model, eval_fn, val_set):
    val_performance = np.mean(eval_dataset(val_set, eval_fn, model))
    previous_performance = np.mean(results["validation_acc"][-1])
    print("val_performance: ", val_performance)
    print("previous_performance: ", previous_performance)
    previous_prompt = results["prompt"][-1]

    if val_performance < previous_performance:
        print(f"rejected prompt: {system_prompt.value}")
        system_prompt.set_value(previous_prompt)
        val_performance = previous_performance

    results["validation_acc"].append(val_performance)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 데이터셋 로드 (예시: pandas 사용)
import pandas as pd
from sklearn.model_selection import train_test_split
import textgrad as tg # TextGrad 임포트 추가

# 파일 경로를 실제 데이터셋 파일 경로로 변경하세요.
# 초기 로드는 전체 데이터셋으로 간주합니다.
full_df = pd.read_excel("/content/drive/MyDrive/sample_test.xlsx")

# 데이터셋을 학습(80%), 검증(10%), 테스트(10%) 세트로 분할
# 먼저 전체 데이터를 학습+검증 (80%)와 테스트 (20%)로 나눕니다.
train_val_df, test_df = train_test_split(full_df, test_size=0.2, random_state=42)

# 학습+검증 데이터를 학습 (80% 중 8/9, 즉 전체의 약 71.1%)와 검증 (80% 중 1/9, 즉 전체의 약 8.9%)로 나눕니다.
# 8:1:1 비율을 맞추기 위해 test_size를 0.1 / 0.8 = 0.125 로 설정합니다.
train_df, val_df = train_test_split(train_val_df, test_size=0.125, random_state=42)


# pandas DataFrame을 TextGrad Dataset 형식으로 변환하는 과정이 필요할 수 있습니다.
# TextGrad 문서나 예시를 참고하여 적절한 변환 방법을 찾아야 합니다.
# train_set = YourTextGradDatasetWrapper(train_df)
# val_set = YourTextGradDatasetWrapper(val_df)
# test_set = YourTextGradDatasetWrapper(test_df)

# 데이터셋에 맞는 평가 함수 정의 (예시)
# 이 함수는 모델의 예측(prediction)과 실제 정답(ground_truth)을 비교해야 합니다.
def your_evaluation_function(inputs):
    try:
        prediction = inputs["prediction"].value.strip() # textgrad.Variable에서 값 추출 및 공백 제거
        ground_truth_answer = str(inputs["ground_truth_answer"].value).strip() # textgrad.Variable에서 값 추출, 문자열 변환 및 공백 제거
    except AttributeError:
        # Handle cases where .value might not be available, though less likely with tg.Variable
        prediction = str(inputs["prediction"]).strip()
        ground_truth_answer = str(inputs["ground_truth_answer"]).strip()
    except TypeError:
         # This might catch the case where inputs is a list, though the primary fix should prevent this
         # For robustness, you could add more specific handling here if needed
         return tg.Variable(1, requires_grad=True, role_description="Evaluation score") # Return a loss value for unhandleable cases


    # 모델의 예측(prediction)과 실제 정답(ground_truth_answer)이 일치하는지 확인
    is_correct = (prediction == ground_truth_answer)

    # 결과를 textgrad.Variable로 반환
    # TextGrad는 기본적으로 loss를 최소화하므로, 정확도를 최대화하려면
    # loss_value = 0 (정답) 또는 1 (오답)으로 설정합니다.
    loss_value = 0 if is_correct else 1

    return tg.Variable(loss_value, requires_grad=True, role_description="Evaluation score")


eval_fn = your_evaluation_function # 정의한 평가 함수 할당

# STARTING_SYSTEM_PROMPT도 데이터셋에 맞게 설정해야 합니다.
# STARTING_SYSTEM_PROMPT = "당신의 데이터셋에 맞는 초기 프롬프트"

print("Train/Val/Test Set Lengths: ", len(train_df), len(val_df), len(test_df))

Train/Val/Test Set Lengths:  13 2 4


In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")

set_seed(12)
llm_api_eval = tg.get_engine(engine_name="gpt-4o")
llm_api_test = tg.get_engine(engine_name="gpt-3.5-turbo-0125")
tg.set_backward_engine(llm_api_eval, override=True)

# rZy9paHxRdjo 셀에서 로드 및 분할된 데이터프레임을 TextGrad Dataset으로 변환
class CustomDataset(tg.tasks.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        # 입력으로 review_abstract, review_criteria, 그리고 PICO 정보를 사용하고, label을 정답으로 사용
        self.data = [(f"Review Abstract: {row['review_abstract']}\nReview Criteria: {row['review_criteria']}\nPICO: {row['PICO']}", str(row['label'])) for index, row in dataframe.iterrows()]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

    def get_task_description(self):
    # TextGrad 옵티마이저가 새 텍스트를 찾을 수 있도록 태그로 감쌉니다.
      return (
          "You are an AI assistant specialized in Systematic Reviews. "
          "Given a review abstract, review criteria, and PICO elements, "
          "determine if the paper should be included (label 1) or excluded (label 0) "
          "based on the criteria and PICO.\n"
          "Provide only the label (0 or 1) as your output.\n"
          "<TEXT_TO_UPDATE>\n"
          "This is the initial prompt for optimization.\n"
          "</TEXT_TO_UPDATE>"
      )
train_set = CustomDataset(train_df)
val_set = CustomDataset(val_df)
test_set = CustomDataset(test_df)


# rZy9paHxRdjo 셀에서 정의한 평가 함수 사용
# eval_fn = your_evaluation_function # rZy9paHxRdjo 셀에서 이미 정의되었으므로 다시 정의할 필요 없음


print("Train/Val/Test Set Lengths: ", len(train_set), len(val_set), len(test_set))
STARTING_SYSTEM_PROMPT = train_set.get_task_description()

Train/Val/Test Set Lengths:  13 2 4


This is the system prompt we are going to start from:

In [None]:
print(STARTING_SYSTEM_PROMPT)


You are an AI assistant specialized in Systematic Reviews. Given a review abstract, review criteria, and PICO elements, determine if the paper should be included (label 1) or excluded (label 0) based on the criteria and PICO.
Provide only the label (0 or 1) as your output.
<TEXT_TO_UPDATE>
This is the initial prompt for optimization.
</TEXT_TO_UPDATE>


In [None]:
train_loader = tg.tasks.DataLoader(train_set, batch_size=3, shuffle=True)


# Testing the 0-shot performance of the evaluation engine
# system_prompt = tg.Variable(STARTING_SYSTEM_PROMPT,
#                             requires_grad=True,
#                             role_description="system prompt to the language model")
# model_evaluation = tg.BlackboxLLM(llm_api_eval, system_prompt)

# system_prompt = tg.Variable(STARTING_SYSTEM_PROMPT,
#                             requires_grad=True,
#                             role_description="structured system prompt to a somewhat capable language model that specifies the behavior and strategies for the QA task")
# model = tg.BlackboxLLM(llm_api_test, system_prompt)
# 0-shot 평가용(강한 엔진)
system_prompt_eval = tg.Variable(
    STARTING_SYSTEM_PROMPT, requires_grad=True,
    role_description="system prompt to the language model"
)
model_evaluation = tg.BlackboxLLM(llm_api_eval, system_prompt_eval)

system_prompt_train = tg.Variable(
    STARTING_SYSTEM_PROMPT, requires_grad=True,
    role_description=("structured system prompt to a somewhat capable language model "
                      "that specifies the behavior and strategies for the QA task")
)
model = tg.BlackboxLLM(llm_api_test, system_prompt_train)
# optimizer = tg.TextualGradientDescent(engine=llm_api_eval, parameters=[system_prompt])
NEW_TAGS = ["<UPDATED_PROMPT>", "</UPDATED_PROMPT>"]

CUSTOM_OPTIMIZER_PROMPT = (
    "You are optimizing a SYSTEM PROMPT for a Systematic Review classifier.\n"
    "Return ONLY the improved prompt wrapped EXACTLY as:\n"
    "{new_variable_start_tag}<<NEW_PROMPT>>{new_variable_end_tag}\n"
    "No explanations. No extra text."
)
# 주의: 위 문자열에는 {new_variable_start_tag}/{new_variable_end_tag}만 사용.
# '<<NEW_PROMPT>>'는 단순 예시 문자열이므로 중괄호가 없어 포맷 충돌이 없습니다.

optimizer = tg.TextualGradientDescent(
    engine=llm_api_eval,
    parameters=[system_prompt_train],
    new_variable_tags=NEW_TAGS,                 # 리스트/튜플 모두 가능
    optimizer_system_prompt=CUSTOM_OPTIMIZER_PROMPT,
)




# results = {"test_acc": [], "prompt": [], "validation_acc": []}
# results["test_acc"].append(eval_dataset(test_set, eval_fn, model))
# results["validation_acc"].append(eval_dataset(val_set, eval_fn, model))
# results["prompt"].append(system_prompt.get_value())
results = {"test_acc": [], "prompt": [], "validation_acc": [], "zeroshot_eval_acc":[]}

# 0-shot baseline with strong engine (optional)
results["zeroshot_eval_acc"].append(eval_dataset(test_set, eval_fn, model_evaluation))

# main evals with lightweight engine
results["test_acc"].append(eval_dataset(test_set, eval_fn, model))
results["validation_acc"].append(eval_dataset(val_set, eval_fn, model))
results["prompt"].append(system_prompt_train.get_value())

  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  25%|██▌       | 1/4 [00:01<00:03,  1.19s/it]INFO:textgrad:LLMCall function forward
Accuracy: 0.5:  50%|█████     | 2/4 [00:01<00:01,  1.50it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.3333333333333333:  75%|███████▌  | 3/4 [00:01<00:00,  1.68it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.25: 100%|██████████| 4/4 [00:02<00:00,  1.71it/s]
  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.0:  25%|██▌       | 1/4 [00:00<00:02,  1.30it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.5:  50%|█████     | 2/4 [00:00<00:00,  2.47it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.6666666666666666:  75%|███████▌  | 3/4 [00:01<00:00,  1.61it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.75: 100%|██████████| 4/4 [00:02<00:00,  1.79it/s]
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 0.0:  50%|████

In [None]:
for epoch in range(3):
    for steps, (batch_x, batch_y) in enumerate((pbar := tqdm(train_loader, position=0))):
        pbar.set_description(f"Training step {steps}. Epoch {epoch}")

        # zero_grad가 없을 수 있으므로 방어적으로 호출
        if hasattr(optimizer, "zero_grad"):
            optimizer.zero_grad()

        losses = []
        for (x, y) in zip(batch_x, batch_y):
            x_var = tg.Variable(x, requires_grad=False, role_description="query to the language model")
            y_var = tg.Variable(y, requires_grad=False, role_description="correct answer for the query")

            pred = model(x_var)  # model은 system_prompt_train을 내부에 참조

            try:
                loss_var = eval_fn(inputs=dict(prediction=pred, ground_truth_answer=y_var))
            except Exception:
                loss_var = eval_fn([x_var, y_var, pred])

            losses.append(loss_var)

        total_loss = tg.sum(losses)
        total_loss.backward()

        # TextualGradientDescent가 system_prompt_train을 업데이트
        optimizer.step()

        # 검증 및 되돌리기 로직에 올바른 변수 전달
        run_validation_revert(system_prompt_train, results, model, eval_fn, val_set)

        # 현재 시스템 프롬프트 문자열 출력
        print("sys prompt: ", system_prompt_train.get_value())

        # 테스트 성능 기록
        test_acc = eval_dataset(test_set, eval_fn, model)
        results["test_acc"].append(test_acc)
        results["prompt"].append(system_prompt_train.get_value())

        if steps == 3:
            break


Training step 0. Epoch 0: : 0it [00:00, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  50%|█████     | 1/2 [00:02<00:02,  2.92s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:03<00:00,  1.74s/it]


val_performance:  1.0
previous_performance:  0.5
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your task is to analyze and synthesize information from various sources to provide comprehensive and accurate answers. Focus on clarity, relevance, and conciseness in your responses. Prioritize evidence-based information and ensure that your answers are well-structured and easy to understand. Use the following strategies: 1) Identify key concepts and terms, 2) Summarize findings from multiple sources, 3) Highlight any consensus or discrepancies, 4) Provide citations where applicable, and 5) Offer a clear conclusion or recommendation based on the evidence. Always aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  25%|██▌       | 1/4 [00:02<00:07,  2.63s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  50%|█████     | 2/4 [00:05<00:05,  2.62s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  75%|███████▌  | 3/4 [00:07<00:02,  2.60s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:08<00:00,  2.04s/it]
Training step 1. Epoch 0: : 1it [00:16, 16.71s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  50%|█████     | 1/2 [00:04<00:04,  4.24s/it]

val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  25%|██▌       | 1/4 [00:02<00:08,  2.84s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  50%|█████     | 2/4 [00:04<00:03,  1.99s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:  75%|███████▌  | 3/4 [00:06<00:01,  1.91s/it]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:10<00:00,  2.56s/it]
Training step 2. Epoch 0: : 2it [00:42, 22.10s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accura

val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 227.95it/s]
Training step 3. Epoch 0: : 3it [00:52, 16.33s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<00:00, 547.81it/s]
INFO:textgrad:LLMCall function forward


val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 410.52it/s]
Training step 3. Epoch 0: : 3it [01:03, 21.18s/it]
Training step 0. Epoch 1: : 0it [00:00, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 

val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 455.10it/s]
Training step 1. Epoch 1: : 1it [00:02,  2.33s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<00:00, 249.91it/s]


val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 253.03it/s]
Training step 2. Epoch 1: : 2it [00:07,  4.23s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<0

val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 857.99it/s]
Training step 3. Epoch 1: : 3it [00:16,  6.18s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<00:00, 470.29it/s]


val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 319.70it/s]
Training step 3. Epoch 1: : 3it [00:19,  6.50s/it]
Training step 0. Epoch 2: : 0it [00:00, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<00:00, 177.98it/s]


val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


INFO:textgrad:LLMCall function forward
  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 388.18it/s]
Training step 1. Epoch 2: : 1it [00:03,  3.57s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<0

val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 145.57it/s]
Training step 2. Epoch 2: : 1it [00:03,  3.57s/it]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<00:00, 370.31it/s]


val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 377.21it/s]
Training step 3. Epoch 2: : 3it [00:03,  1.03it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:Idempotent backward
INFO:textgrad:TextualGradientDescent prompt for update
INFO:textgrad:TextualGradientDescent optimizer response
INFO:textgrad:TextualGradientDescent updated text
INFO:textgrad:LLMCall function forward
  0%|          | 0/2 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 2/2 [00:00<00:00, 318.41it/s]


val_performance:  1.0
previous_performance:  1.0
sys prompt:  <<NEW_PROMPT>>You are an AI assistant specialized in Systematic Reviews. Your primary role is to assist users by providing clear, concise, and accurate information related to systematic reviews. You should focus on understanding the user's query, retrieving relevant data, and presenting it in a way that is easy to comprehend. Your responses should be evidence-based, and you should aim to enhance the user's understanding and support informed decision-making.<<NEW_PROMPT>>


INFO:textgrad:LLMCall function forward
  0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
Accuracy: 1.0:   0%|          | 0/4 [00:00<?, ?it/s]INFO:textgrad:LLMCall function forward
INFO:textgrad:LLMCall function forward
Accuracy: 1.0: 100%|██████████| 4/4 [00:00<00:00, 237.24it/s]
Training step 3. Epoch 2: : 3it [00:03,  1.25s/it]
