In [23]:
%pip install --upgrade python-dotenv nest_asyncio pydantic google-genai requests pandas

from IPython.display import clear_output ; clear_output()

In [24]:
import os
import json
import asyncio

from dotenv import load_dotenv
import nest_asyncio

from textwrap import dedent
from IPython.display import display, Markdown

from pydantic import BaseModel, Field
from enum import Enum

import pandas as pd

from google import genai
from google.genai import types

load_dotenv()

nest_asyncio.apply()

_gemini_client_aio = genai.Client(api_key=os.getenv('GEMINI_API_KEY')).aio

G25PRO = 'gemini-2.5-pro-preview-03-25'
G25FLASH = 'gemini-2.5-flash-preview-04-17'

async def gemini(
        prompt,
        pro = False, max_tokens = None, temperature = None,
        budget = None, schema = None):
    config = {}
    if max_tokens is not None:
        config['max_output_tokens'] = max_tokens
    if temperature is not None:
        config['temperature'] = temperature
    if budget is not None:
        config['thinking_config'] = {'thinking_budget': budget}
    if schema is not None:
        config['response_mime_type'] = 'application/json'
        config['response_schema'] = schema
    
    response = await _gemini_client_aio.models.generate_content(
        model=(G25PRO if pro else G25FLASH),
        contents=prompt,
        config=config,
    )
    
    if schema is not None:
        return response.parsed
    else:
        return response.text

def md(str): display(Markdown(str))

def display_df(df):
    display(df.style.set_properties(
        **{'text-align': 'left', 'vertical-align': 'top', 'white-space': 'pre-wrap', 'width': '50%'},
    ))

In [25]:
dataset = pd.read_csv('dataset.csv')

display_df(dataset.head(3))

print(f'{len(dataset)} items in dataset.')

Unnamed: 0,input,output
0,"The afternoon sun filtered lazily through the stained-glass windows of The Crown Inn, casting colourful patterns onto the worn carpet. It was a quiet, unhurried time of day. The pub held a palpable sense of calm. At a small, round table near the back, Arthur, a male aged 68, slowly turned a coaster between his fingers, contemplating his half-empty pint. Beside him sat Eleanor, a female aged 65, her spectacles perched on her nose as she read a book, occasionally taking a quiet sip of her water. Their presence was peaceful. Closer to the polished oak bar, Liam, a male aged 32, was engaged in a low-voiced conversation with Chloe, a female aged 30. He leaned forward slightly, listening intently, while she gestured softly with one hand. The air hummed gently with the distant clinking of a glass and the low murmur of background music, creating a perfectly calm atmosphere on this afternoon.","PEOPLE:  - Arthur, 68, male  - Chloe, 30, female  - Eleanor, 65, female  - Liam, 32, male LOCATION: pub TIME_OF_DAY: afternoon MOOD: calm"
1,"The late afternoon sun cast long, distorted shadows through the living room windows. Clara, age 8, female, clutched a worn teddy bear, her small body trembling. Beside her, Leo, age 10, male, stood frozen, his face pale despite his attempt at a brave stance. Their mother, Sarah, age 35, female, was on the phone in the kitchen moments before, but a sudden, sharp crack from the front of the house had silenced her. Now, she moved slowly into the hallway, her eyes wide with a primal fear that mirrored her children's. The house, usually a place of comfort and noise, was unnaturally quiet. The only sound was the frantic thumping of their own hearts. Then, a figure emerged from the deeper shadows near the front door. It was Elias, a man in his 50s, male, whom they had never seen before. He didn't move quickly, didn't make a sound, just stood there, a silent, imposing shape in the dim light. His presence felt wrong, like a cold draft in a warm room. Clara whimpered softly, burying her face in the teddy bear. Leo edged closer to Sarah, his earlier bravery completely gone. The air grew heavy, thick with unspoken dread. The afternoon had turned terrifying.","PEOPLE:  - Clara, 8, female  - Elias, 55, male  - Leo, 10, male  - Sarah, 35, female LOCATION: home TIME_OF_DAY: afternoon MOOD: scary"
2,"The only light in the office came from the humming fluorescent tubes above. Sarah, a 32 year old female, huddled closer to her screen, the blue glow reflecting on her tense face. Across the aisle, David, a 45 year old male, slowly turned his head towards the dark hallway, his glasses glinting. Neither spoke. The air was thick with a silence that felt wrong, broken only by the faint, rhythmic dripping sound coming from somewhere beyond the closed conference room door. Alex, a 25 year old person of other gender, sitting at the next desk, jumped violently when a floorboard creaked overhead on the empty floor above. They clutched their chest, eyes wide. It was well past midnight, the city lights distant pinpricks through the large window, confirming the late hour. A sudden cold draft swept through the room despite the still air. The dripping stopped. A slow, dragging sound replaced it, closer now. David swallowed hard. Sarah stifled a whimper. Alex slowly reached for the heavy stapler on their desk. The sound grew louder, closer. Something was moving in the dark hallway. The mood was undeniably one of creeping terror.","PEOPLE:  - Alex, 25, other  - David, 45, male  - Sarah, 32, female LOCATION: office TIME_OF_DAY: night MOOD: scary"


100 items in dataset.


In [26]:
training_dataset = dataset.iloc[:25].reset_index(drop=True)
validation_dataset = dataset.iloc[25:50].reset_index(drop=True)
testing_dataset = dataset.iloc[50:100].reset_index(drop=True)

print(f'training: {training_dataset.shape}')
display_df(training_dataset.tail(1))

print(f'validation: {validation_dataset.shape}')
display_df(validation_dataset.tail(1))

print(f'testing: {testing_dataset.shape}')
display_df(testing_dataset.tail(1))

training: (25, 2)


Unnamed: 0,input,output
24,"Liam, a 28-year-old male, clapped Ben, a 29-year-old male, on the back. ""Can you believe it?"" Liam grinned widely. Aisha, a 27-year-old female, linked arms with Ben, her face alight with laughter. They were walking down Elm Street, the only light coming from the warm glow of the infrequent lampposts overhead and the distant shimmer of city lights. The sky above was a deep, inky black, dotted with faint stars. The usual daytime bustle of traffic and pedestrians was gone, replaced by a quiet hum. Aisha squeezed Ben's arm. ""Seriously, I thought we'd messed it up for sure."" Her voice was light and airy. Ben chuckled, the sound carrying slightly in the still air. ""Pure luck, maybe, but we did it! That presentation was intense."" He looked from Aisha to Liam, both friends beaming back at him. They had just left the office building a few blocks away, having successfully landed a major project they'd worked on for months. The relief and excitement bubbled up, turning their walk into a celebratory stroll. They weren't rushing; they were savouring the moment, their steps light, their conversation easy, punctuated by bursts of genuine, joyful laughter that echoed softly between the buildings. The cool night air felt fresh and invigorating.","PEOPLE:  - Aisha, 27, female  - Ben, 29, male  - Liam, 28, male LOCATION: street TIME_OF_DAY: night MOOD: happy"


validation: (25, 2)


Unnamed: 0,input,output
24,"Sarah, an eight year old female, sat on the worn rug in the living room. Her father, David, a forty year old male, was reading a newspaper in his favorite armchair. Her mother, Emily, a thirty eight year old female, hummed softly from the kitchen nearby. Afternoon sun slanted through the windows, casting warm, long rectangles across the familiar wooden floor of their house. This was their home, a place usually filled with comfort and safety. But today, a strange silence hung in the air, broken only by Emily's soft tune. Suddenly, a noise came from upstairs. It was a slow, deliberate creak, like heavy footsteps where no one should be. David lowered his paper, his eyes fixed on the ceiling. Emily's humming stopped abruptly. Another creak, louder this time, followed by a soft dragging sound that seemed to move across the floorboards directly overhead. Sarah whimpered, pressing herself closer to the rug. The light outside was still bright afternoon, but the air inside the house felt cold and heavy. The dragging stopped. Then came a single, sharp rap against the ceiling, directly above them. A shared look of fear passed between David and Emily. The house felt like it was holding its breath.","PEOPLE:  - David, 40, male  - Emily, 38, female  - Sarah, 8, female LOCATION: home TIME_OF_DAY: afternoon MOOD: scary"


testing: (50, 2)


Unnamed: 0,input,output
49,"The weak morning sun struggled to pierce the thick kitchen curtains. Dust motes danced slowly in the few rays that made it through the quiet room. A half-eaten bowl of cereal sat cold on the worn wooden table, untouched. Sarah, a woman in her mid-forties, gently stirred a mug of tea, her gaze fixed on the dark liquid as if seeking answers there. Opposite her sat Mark, a man of similar age, his shoulders slumped, tracing aimless patterns on the table surface with his finger. Thirteen-year-old Emily, a girl with tear-streaked cheeks, picked listlessly at a piece of toast beside her younger brother. Tom, a boy of seven, sat unusually still in his chair, his lower lip trembling slightly as he looked between his parents, sensing the weight in the air. The usual morning sounds of hurried footsteps and clattering dishes were absent, replaced by a heavy silence that pressed down on the small family like a physical weight. Each person seemed lost in their own private cloud of sorrow, the atmosphere thick with unspoken grief and shared sadness.","PEOPLE:  - Emily, 13, female  - Mark, 45, male  - Sarah, 45, female  - Tom, 7, male LOCATION: home TIME_OF_DAY: morning MOOD: sad"


In [27]:
training_sample_prompt = '<training-samples>\n'
for i, row in training_dataset.iterrows():
    training_sample_prompt += (
        "<sample>\n"
        "<input>\n"
        + str(row['input']) + "\n"
        "</input>\n"
        "<output>\n"
        + str(row['output']) + "\n"
        "</output>\n"
        "</sample>\n"
    )
training_sample_prompt += '</training-samples>'
training_sample_prompt = dedent(training_sample_prompt)

initial_training_prompt = f"""
You are an expert AI engineer.
Your goal is to create the most accurate and effective prompt for an LLM.
Below you are provided with a set of training samples.
Each samples consists of an input and an output.
You should create a prompt that will generate the output given the input.

Instructions: thinking carefully about the training samples to understand the exact transformation required.
Output: output only the generated prompt, without any additional text or structure (no quoting, no JSON, no XML, etc...)

{training_sample_prompt}
"""

transform_prompt = await gemini(initial_training_prompt, budget=12345)

print(transform_prompt)


Analyze the following narrative text to extract key information about the scene. Identify the individuals present (Name, Age, Gender), the location, the time of day, and the overall mood. Format the extracted information precisely as shown in the provided training examples. List people alphabetically by name. Provide only the structured output.


In [28]:

validation_prompts = []
expected = []
for _, row in validation_dataset.iterrows():
    expected.append(str(row['output']))
    validation_prompts.append(f"""{transform_prompt}

<input>
{str(row['input'])}
</input>
""")

results = await asyncio.gather(*(gemini(p) for p in validation_prompts))

validation_results = [
    {'expected': exp, 'result': res, 'match': exp == res}
    for exp, res in zip(expected, results)
]

validation_accuracy = sum([1 for r in validation_results if r['match']]) / len(validation_results)
print(f'Validation accuracy: {validation_accuracy:.2%}')

Validation accuracy: 0.00%


In [29]:
current_prompt = transform_prompt
current_validation_accuracy = validation_accuracy
current_validation_results = validation_results
continue_training = True

epoch = 1

while continue_training:
    print(f'Epoch {epoch}\n\n')

    further_training_prompt = dedent(f"""
        You are an expert AI engineer.
        Your goal is to create the most accurate and effective prompt for an LLM.
        Below you are provided with a set of training samples.
        Each samples consists of an input and an output.
        You should create a prompt that will generate the output given the input.

        Instructions: thinking carefully about the training samples to understand the exact transformation required.
        Output: output only the generated prompt, without any additional text or structure (no quoting, no JSON, no XML, etc...).
        
        In a previous run, you created the following prompt:
        <previous-prompt>
        {current_prompt}
        </previous-prompt>

        This prompt was evaluated on a set of validation samples.
        The validation accuracy was {current_validation_accuracy:.2%}.
        The validation results were:
        <validation-results>
        {json.dumps(current_validation_results, indent=2)}
        </validation-results>

        You need to improve the prompt.
        Remember that you can rewrite the prompt completely if needed -
        the previous prompt is provided here for your review.
        Below you are provided with a set of training samples
        (these are different from the validation samples).
        Try to create a better and more accurate prompt that creates the output given the input.

        {training_sample_prompt}
    """)

    # print(f'Further training prompt:\n---\n{further_training_prompt}\n---\n')

    new_prompt = await gemini(further_training_prompt, budget=12345)

    validation_prompts = []
    expected = []
    for _, row in validation_dataset.iterrows():
        expected.append(str(row['output']).strip())
        validation_prompts.append(f"""{transform_prompt}

        <input>
        {str(row['input'])}
        </input>
        """)

    results = await asyncio.gather(*(gemini(p) for p in validation_prompts))

    validation_results = [
        {'expected': exp, 'result': res, 'match': exp == res}
        for exp, res in zip(expected, [r.strip() for r in results])
    ]

    new_validation_accuracy = sum([1 for r in validation_results if r['match']]) / len(validation_results)

    print(f'New validation accuracy: {new_validation_accuracy:.2%}')
    print(f'New prompt:\n---\n{new_prompt}\n---\n')
    # print(f'Validation results:\n---\n{json.dumps(validation_results, indent=2)}\n---\n')

    if new_validation_accuracy > current_validation_accuracy:
        current_prompt = new_prompt
        current_validation_accuracy = new_validation_accuracy
        current_validation_results = validation_results
    
    epoch += 1

    if epoch >= 23 or new_validation_accuracy >= 0.95:
        continue_training = False

Epoch 1


New validation accuracy: 0.00%
New prompt:
---
Extract scene information from the narrative text following these precise instructions:
1.  **People**: Identify all individuals mentioned. For each person, extract their exact Name, Age (as a number), and Gender (categorized as 'male', 'female', or 'other'). List them alphabetically by Name under the heading "PEOPLE:". Each person must be on a new line, indented by exactly two spaces, starting with a hyphen, followed by the Name, a comma, a single space, the Age number, a comma, a single space, and the gender in lowercase, like `- Name, Age, gender`. Ensure there is a blank line immediately after the last person listed.
2.  **Location**: Determine the primary setting of the scene. Categorize the location strictly as one of the following terms: office, home, nature, pub, street. List this under the heading "LOCATION:".
3.  **Time of Day**: Determine the time of day. Categorize the time strictly as one of the following terms: morn

CancelledError: 