# Tiny TextGrad: A Minimal Text Gradient Descent Implementation


## LLM Helpers


In [1]:
from textwrap import dedent

from litellm import completion


def _call_llm(
  prompt: str,
  input: str,
  model: str = "gpt-4o-mini",
  temperature: float = 0.9,
  max_tokens: int = 4096,
  top_p: float = 0.95,
  frequency_penalty: float = 0,
  format_as_json: bool = False,
) -> str:
  messages = [
    {"role": "system", "content": prompt},
    {"role": "user", "content": input},
  ]
  response = completion(
    model=model,
    messages=messages,
    temperature=temperature,
    max_tokens=max_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
    response_format={
      "type": "json_object"
      if format_as_json
      else "text"
    },
  )
  return response.choices[0].message.content


def call_llm(
  prompt: str,
  input: str,
  model: str = "gpt-4o-mini",
  temperature: float = 0.9,
  max_tokens: int = 4096,
  top_p: float = 0.95,
  frequency_penalty: float = 0,
) -> str:
  return _call_llm(
    prompt=prompt,
    input=input,
    model=model,
    temperature=temperature,
    max_tokens=max_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
    format_as_json=False,
  )


def enforce_json(
  json_prompt: str,
  input: str,
  model: str = "gpt-4o-mini",
  temperature: float = 0.0,
  max_tokens: int = 4096,
  top_p: float = 0.95,
  frequency_penalty: float = 0,
) -> str:
  return _call_llm(
    prompt=json_prompt,
    input=input,
    model=model,
    temperature=temperature,
    max_tokens=max_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
    format_as_json=True,
  )


def call_llm_with_json_output(
  prompt: str,
  json_prompt: str,
  input: str,
  model: str = "gpt-4o-mini",
  temperature: float = 0.9,
  max_tokens: int = 4096,
  json_max_tokens: int = 4096,
  top_p: float = 0.95,
  frequency_penalty: float = 0,
) -> str:
  result = call_llm(
    prompt=prompt,
    input=input,
    model=model,
    temperature=temperature,
    max_tokens=max_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
  )
  print(f"Initial result: {result}")
  return enforce_json(
    json_prompt=json_prompt,
    input=result,
    model=model,
    temperature=0.0,
    max_tokens=json_max_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
  )


def get_json_list(json_data: dict) -> list:
  key = next(iter(json_data))
  if not isinstance(json_data[key], list):
    raise ValueError(
      "The JSON data does not contain a list."
    )
  return json_data[key]

## Tiny TextGrad


In [31]:
from dataclasses import dataclass
from typing import Tuple


@dataclass
class OptimizationResult:
  optimized_prompt: str
  model: str
  temperature: float
  top_p: float
  frequency_penalty: float


class Variable:
  def __init__(
    self,
    value,
    requires_grad=True,
    role_description="",
  ) -> None:
    self.value = value
    self.requires_grad = requires_grad
    self.role_description = role_description
    self.grad = None

  def set_gradient(self, grad: str):
    if self.requires_grad:
      self.grad = grad

  def backward(
    self, application_prompt: str, engine
  ) -> None:
    """
    Applies the gradient to the variable value using an engine.
    """
    if self.requires_grad and self.grad:
      new_value = engine.generate(
        prompt=application_prompt,
        input=f"Original Prompt: {self.value}\n\nFeedback: {self.grad}",
      )
      self.value = self._clean_prompt(
        new_value, engine
      ).strip()
      print(f"Updated value: {self.value}")

  def _clean_prompt(self, prompt, engine):
    cleaned_prompt = engine.generate(
      prompt=dedent("""
            Clean up the following prompt. Remove any meta-commentary or
            explanations about the prompt itself. The result should be a clear,
            concise prompt ready for direct use.
            """),
      input=f"Original Prompt: {prompt}",
    ).strip()
    print(f"Cleaned prompt: {cleaned_prompt}")
    return cleaned_prompt.strip()


class Engine:
  def __init__(
    self,
    model_name="gpt-4o-mini",
    temperature=0.7,
    max_tokens=2048,
    top_p=0.95,
    frequency_penalty=0,
  ):
    self.model_name = model_name
    self.temperature = temperature
    self.max_tokens = max_tokens
    self.top_p = top_p
    self.frequency_penalty = frequency_penalty

  def generate(self, prompt: str, input: str):
    response = call_llm(
      prompt=prompt,
      input=input,
      model=self.model_name,
      temperature=self.temperature,
      max_tokens=self.max_tokens,
      top_p=self.top_p,
      frequency_penalty=self.frequency_penalty,
    )
    return response


class TextLoss:
  def __init__(
    self, feedback_prompt: str, engine: Engine
  ):
    self.feedback_prompt = feedback_prompt
    self.engine = engine

  def forward(
    self,
    prompt: str,
    results: list[Tuple[str, str]],
  ):
    formatted_results = "\n".join(
      [
        f"Input: {input}\nOutput: {output}"
        for input, output in results
      ]
    )
    evaluation_input = f"Prompt:\n{prompt}\n\nResults:\n{formatted_results}"
    feedback = self.engine.generate(
      self.feedback_prompt, evaluation_input
    )
    return feedback


class TGD:
  def __init__(
    self,
    variable: Variable,
    model_engine: Engine,
    eval_engine: Engine,
    loss_fn: TextLoss,
    inputs: list[str],
  ):
    self.variable = variable
    self.model_engine = model_engine
    self.eval_engine = eval_engine
    self.loss_function = loss_fn
    self.inputs = inputs

  def generate_results(self):
    results = []
    for _input in self.inputs:
      output = self.model_engine.generate(
        self.variable.value, _input
      )
      results.append((_input, output))
    return results

  def step(self):
    results = self.generate_results()
    feedback = self.loss_function.forward(
      self.variable.value, results
    )
    print(f"Feedback: {feedback}")
    self.variable.set_gradient(feedback)
    self.apply_gradient()

  def apply_gradient(self):
    application_prompt = dedent("""
            Revise the given prompt based on the feedback. Focus only on the content of the prompt itself,
            not on explanations about the revision process. Do not include examples unless they were part
            of the original prompt. The revised prompt should be ready to use as-is, without any additional
            explanations or meta-commentary.
            """).strip()
    self.variable.backward(
      application_prompt, self.eval_engine
    )

  def optimize_text(
    self, num_iterations: int = 5
  ) -> OptimizationResult:
    for i in range(num_iterations):
      print(f"Iteration {i+1}:")
      print(f"Current prompt: {self.variable.value}")
      self.step()
      print(f"Feedback: {self.variable.grad}\n")

    return OptimizationResult(
      optimized_prompt=self.variable.value,
      model=self.model_engine.model_name,
      temperature=self.model_engine.temperature,
      top_p=self.model_engine.top_p,
      frequency_penalty=self.model_engine.frequency_penalty,
    )

## Prompt Optimization with TextGrad Lite


In [32]:
# Common loss function instructions
PROMPT_LOSS_FN_INSTRUCTIONS = """
Evaluate the prompt and its results. Provide feedback on the following aspects:
1. Clarity of the prompt
2. Accuracy of the generated answers
3. Handling of cases where the answer is not in the text
4. Suggestions for improvement

Be specific and constructive in your feedback.
"""


def optimize_prompt(
  initial_prompt: str,
  model_name: str,
  eval_model_name: str,
  inputs: list[str],
) -> OptimizationResult:
  model_engine = Engine(model_name)
  eval_engine = Engine(eval_model_name)

  variable = Variable(
    value=initial_prompt,
    role_description="Prompt to optimize",
  )

  loss_fn = TextLoss(
    PROMPT_LOSS_FN_INSTRUCTIONS,
    eval_engine,
  )

  optimizer = TGD(
    variable=variable,
    model_engine=model_engine,
    eval_engine=eval_engine,
    loss_fn=loss_fn,
    inputs=inputs,
  )

  optimized_text = optimizer.optimize_text()
  return optimized_text

## Prompt: Given text and a question, does the text answer the question?


In [24]:
initial_prompt = dedent("""
Given some text and a question, determine if the text
contains the answer to the question
""").strip()

inputs = [
  "Text: The cat is on the mat. Question: Where is the cat?",
  "Text: The sky is blue. Question: What color is the grass?",
  "Text: Paris is the capital of France. Question: What is the capital of Germany?",
  "Text: The Earth orbits the Sun. Question: What does the Earth orbit?",
  "Text: Water freezes at 0 degrees Celsius. Question: At what temperature does water boil?",
  "Text: Beethoven composed many symphonies. Question: Who composed the Fifth Symphony?",
  "Text: Elephants are the largest land animals. Question: What is the largest land animal?",
  "Text: Shakespeare wrote 'Romeo and Juliet.' Question: Who wrote 'Hamlet'?",
  "Text: Humans have 206 bones. Question: How many bones do humans have?",
  "Text: Coffee is typically grown in tropical regions. Question: Where is coffee typically grown?",
]

result = optimize_prompt(
  initial_prompt,
  "gpt-4o-mini",
  "gpt-4o",
  inputs,
)

print(
  f"\n\nFinal optimized TEXT_CONTAINS_ANSWER_PROMPT:\n{result}"
)
TEXT_CONTAINS_ANSWER_PROMPT = result

Iteration 1:
Current prompt: Given some text and a question, determine if the text
contains the answer to the question
Feedback: 1. **Clarity of the prompt:**
   - The prompt is mostly clear in its intent. It asks to determine if the text contains the answer to the question provided. However, the instruction could be slightly refined for better clarity. For example, it could specify whether the output should only be "Yes" or "No" or if it should include additional information when the answer is found.

2. **Accuracy of the generated answers:**
   - The generated answers are mostly accurate. However, there are some inconsistencies:
     - "Text: The Earth orbits the Sun. Question: What does the Earth orbit? Output: Yes" is correct but could be more informative by specifying "the Sun."
     - "Text: Beethoven composed many symphonies. Question: Who composed the Fifth Symphony? Output: Yes" should ideally include "Beethoven" in the response for consistency.
     - "Text: Paris is the capi

### Test


In [11]:
for _input in inputs:
  response = call_llm(
    prompt=TEXT_CONTAINS_ANSWER_PROMPT.optimized_prompt,
    input=_input,
    temperature=0.7,
    max_tokens=100,
    top_p=0.95,
    frequency_penalty=0,
  )
  print(response)

Yes. The answer to the question "Where is the cat?" is explicitly stated in the text as "The cat is on the mat."
No. The text does not contain information about the color of the grass.
No. The text explicitly mentions Paris as the capital of France, but it does not provide information about the capital of Germany.
Yes. The text explicitly states that "The Earth orbits the Sun," which directly answers the question "What does the Earth orbit?"
No. The text does not provide information about the temperature at which water boils.
No. The text mentions Beethoven composed many symphonies, but it does not specify that he composed the Fifth Symphony.
Yes. The text explicitly states that elephants are the largest land animals, providing the answer to the question.
No. The text does not provide the answer to the question about who wrote "Hamlet." The text only mentions that Shakespeare wrote "Romeo and Juliet."
Yes. The text explicitly states that humans have 206 bones.
Yes. The text explicitly 

## Prompt: Given text, generate question-answer pairs for the text.


In [33]:
initial_prompt = """
Generate a set of relevant questions and their
corresponding answers about the given text. Ensure the
questions cover a mix of factual, analytical, and
application-based types to provide both surface-level
and in-depth knowledge of the subject.
"""

data = [
  "The process of cellular respiration converts biochemical energy from nutrients into adenosine triphosphate (ATP), and releases waste products. Cellular respiration is a set of metabolic reactions and processes that take place in the cells of organisms to convert chemical energy from oxygen molecules or nutrients into ATP, and then release waste products.",
  "The first human heart transplant was performed by Dr. Christiaan Barnard on December 3, 1967, in Cape Town, South Africa. The patient, Louis Washkansky, lived for 18 days after the surgery.",
  "Rosalind Franklin was a British biophysicist and X-ray crystallographer whose work was critical in the understanding of the molecular structures of DNA, RNA, and viruses. She is best known for her photograph of DNA, known as Photo 51, which contributed significantly to the discovery of the DNA double helix by Watson and Crick.",
  "The Great Wall of China is a series of fortifications made of various materials, including stone, brick, tamped earth, wood, and other materials. It was built along the northern borders of China to protect against invasions and raids from various nomadic groups. The most well-known sections were built by the Ming Dynasty (1368-1644).",
  "The Theory of General Relativity, formulated by Albert Einstein, describes the gravitational force as a curvature of spacetime caused by mass and energy. One of its most famous predictions is the bending of light around massive objects, which has been confirmed through various experiments, including the observation of a solar eclipse in 1919.",
  "Marie Curie was a Polish-born physicist and chemist who conducted pioneering research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win Nobel Prizes in two different scientific fields—Physics (1903) and Chemistry (1911). Her discoveries included the elements polonium and radium.",
  "The internet is a global network of interconnected computers that communicate via standardized protocols. It enables a wide range of services, including the World Wide Web, email, and file sharing. The internet has revolutionized communication, commerce, and access to information.",
]
inputs = data[:1]

result = optimize_prompt(
  initial_prompt,
  "gpt-4o-mini",
  "gpt-4o",
  inputs,
)

print(
  f"\n\nFinal optimized GENERATE_QA_PAIRS_PROMPT:\n{result}"
)
GENERATE_QA_PAIRS_PROMPT = result

Iteration 1:
Current prompt: 
Generate a set of relevant questions and their
corresponding answers about the given text. Ensure the
questions cover a mix of factual, analytical, and
application-based types to provide both surface-level
and in-depth knowledge of the subject.

Feedback: **Feedback:**

1. **Clarity of the Prompt:**
   - The prompt is clear and specific. It effectively asks for a diverse set of questions, including factual, analytical, and application-based ones, ensuring a comprehensive understanding of the subject.

2. **Accuracy of the Generated Answers:**
   - The answers provided are accurate based on the given text. Each answer correctly aligns with the information presented in the input passage.

3. **Handling of Cases Where the Answer is Not in the Text:**
   - There are no instances in the provided results where the answer is not in the text. All the questions are directly answerable based on the given information.

4. **Suggestions for Improvement:**
   - To enha

## Prompt: Given a question, generate rewordings of the question.


In [None]:
initial_prompt = """
Rephrase the given question in at least three distinct ways while maintaining
the original meaning and context. Aim for creativity, avoid repetition, and
steer clear of overly complex vocabulary.
"""

data = [
  "What is ATP and why is it important?",
  "What are the main themes explored in George Orwell's novel '1984'?",
  "How does the process of photosynthesis benefit plant life and ecosystems?",
  "What were the primary causes of the fall of the Roman Empire?",
  "What role does the Federal Reserve play in the United States economy?",
  "How does Quantum Computing differ from Classical Computing?",
  "What are the ethical implications of genetic engineering in humans?",
  "How do vaccines work to prevent diseases at the molecular level?",
  "What architectural features are characteristic of Gothic cathedrals?",
  "What is the significance of the Theory of Relativity in modern physics?",
]
inputs = data[:1]

result = optimize_prompt(
  initial_prompt,
  "gpt-4o-mini",
  "gpt-4o",
  inputs,
)

print(
  f"\n\nFinal optimized REWORDING_PROMPT:\n{result}"
)
REWORDING_PROMPT = result

In [26]:
TEXT_CONTAINS_ANSWER_PROMPT = OptimizationResult(
  optimized_prompt=dedent("""
Does the provided text explicitly contain the answer
to the given question? Respond with "Yes" or "No" and
provide a brief explanation.
""").strip(),
  model="gpt-4o-mini",
  temperature=0.7,
  top_p=0.95,
  frequency_penalty=0,
)


GENERATE_QA_PAIRS_PROMPT = OptimizationResult(
  optimized_prompt=dedent("""
Generate relevant questions and their corresponding
answers about the given text, covering factual,
analytical, and application-based types. Ensure all
questions and answers are derived strictly from the
text without introducing external information.
""").strip(),
  model="gpt-4o-mini",
  temperature=0.7,
  top_p=0.95,
  frequency_penalty=0,
)


REWORD_QUESTION_PROMPT = OptimizationResult(
  optimized_prompt=dedent("""
Rephrase the following question in three distinct
ways, ensuring the original meaning and context remain
intact. Each version should be structurally unique and
use clear language.
""").strip(),
  model="gpt-4o-mini",
  temperature=0.7,
  top_p=0.95,
  frequency_penalty=0,
)

## Using the optimized prompts


In [27]:
import json

#


def generate_qa_pairs(
  text: str,
  model: str = "gpt-4o-mini",
) -> dict:
  json_prompt = dedent("""
Extract a JSON list of question-answer pairsfrom the given text. Each pair should
include a question (Q) and its corresponding answer (A).
                         """).strip()

  result = call_llm_with_json_output(
    prompt=GENERATE_QA_PAIRS_PROMPT.optimized_prompt,
    json_prompt=json_prompt,
    input=text,
    model=GENERATE_QA_PAIRS_PROMPT.model,
    temperature=GENERATE_QA_PAIRS_PROMPT.temperature,
    max_tokens=2048,
    top_p=GENERATE_QA_PAIRS_PROMPT.top_p,
    frequency_penalty=GENERATE_QA_PAIRS_PROMPT.frequency_penalty,
  )
  print(f"Result for text:\n{text}\n{result}")
  return json.loads(result)


def reword_question(
  question: str,
  model: str = "gpt-4o-mini",
) -> dict:
  json_prompt = dedent("""
Extract a JSON list of questions from the given text (i.e., an array of strings).
                         """).strip()
  result = call_llm_with_json_output(
    prompt=REWORD_QUESTION_PROMPT.optimized_prompt,
    json_prompt=json_prompt,
    input=question,
    model=REWORD_QUESTION_PROMPT.model,
    temperature=REWORD_QUESTION_PROMPT.temperature,
    max_tokens=2048,
    top_p=REWORD_QUESTION_PROMPT.top_p,
    frequency_penalty=REWORD_QUESTION_PROMPT.frequency_penalty,
  )
  print(f"Result for question:\n{question}\n{result}")
  return json.loads(result)


def expand_questions_for_qa_pairs(
  qa_pairs: dict,
) -> list:
  qa_rewrites = []
  pairs = get_json_list(qa_pairs)
  for pair in pairs:
    question = pair["Q"]
    print(f"Original Question: {question}")
    reworded_questions_obj = reword_question(question)
    reworded_questions = get_json_list(
      reworded_questions_obj
    )
    print("Reworded Questions:")
    for reworded_question in reworded_questions:
      print("\t", reworded_question)
    qa_rewrites.append(
      {
        "Q": [question] + reworded_questions,
        "A": pair["A"],
      }
    )
    print("\n")
  return qa_rewrites


def generate_text_expanded_qa(
  text: str,
) -> list[dict]:
  qa_pairs = generate_qa_pairs(text)
  expanded_qa_pairs = expand_questions_for_qa_pairs(
    qa_pairs
  )
  return expanded_qa_pairs

## Example


In [19]:
text = """
Open-endedness is essentially studying systems that can generate their own data
in an infinite capacity. These systems, if run for longer periods, become more
complex and generate increasingly interesting data. If we can crack the
challenge of creating a self-improving system that keeps generating interesting
data, we can use that data to further train our models. However, this leads to
the idea of a perpetual data machine, raising the question of how to generate
net new information from a model trained on previous data.
"""
qa_pairs = generate_text_expanded_qa(text)
print(json.dumps(qa_pairs, indent=2))

Initial result: 1. What is the concept of open-endedness in the context of generating data from systems?
- The concept of open-endedness involves studying systems that can generate their own data in an infinite capacity, becoming more complex and producing increasingly interesting data when run for longer periods.

2. How can self-improving systems that generate interesting data be beneficial in training models?
- Self-improving systems that generate interesting data can be used to further train models, enhancing their learning capabilities and performance.

3. What challenge is highlighted in the text regarding creating a perpetual data machine?
- The challenge highlighted is how to generate net new information from a model trained on previous data when aiming to create a perpetual data machine.

4. How does the idea of a perpetual data machine raise questions about generating new information?
- The idea of a perpetual data machine raises questions about how to continuously generate n