In [4]:
from enum import Enum

import pandas as pd
from cuery import Prompt, ResponseModel, Task, pprint
from pydantic import Field

# Create a prompt from simple string
The `Prompt` class expects a list of (jinja) messages with their roles. But it can also be instantiated from a simple string.

In [6]:
t = "Hello {{name}}! {% for item in ingredients %} {{ item }} {% endfor %}"
p = Prompt.from_string(t)
pprint(p)

# Choices (enum)
Require LLM to respond with one of N _options_ (fixed categories).

In [7]:
class Role(Enum):
    PRINCIPAL = "PRINCIPAL"
    TEACHER = "TEACHER"
    STUDENT = "STUDENT"
    OTHER = "OTHER"


class UserDetail(ResponseModel):
    age: int
    name: str
    role: Role = Field(description="Correctly assign one of the predefined roles to the user.")


prompt = Prompt.from_string("Please a create a synthetic user profile with age, name and role.")
task = Task("user_profile", prompt=prompt, response=UserDetail)

response = await task()
print(response)
response.to_pandas()

[UserDetail(age=25, name='Alice', role=<Role.STUDENT: 'STUDENT'>)]


Unnamed: 0,age,name,role
0,25,Alice,Role.STUDENT


# Simple Multivalued fields
Require LLM to respond with a _list_ of values (unconstrained).

In [9]:
class Ingredients(ResponseModel):
    items: list[str] = Field(description="List of ingredients for the recipe.")


prompt = Prompt.from_string("List the ingredients for the following dish: {{dish}}.")
context = [{"dish": "pasta bolognese"}, {"dish": "chocolate cake"}]

task = Task("dishes", prompt=prompt, response=Ingredients)
responses = await task(context=context)
print(responses)

Iterating context:   0%|          | 0/2 [00:00<?, ?it/s]

[Ingredients(items=['pasta', 'bolognese sauce', 'ground beef', 'onion', 'garlic', 'carrot', 'celery', 'tomato paste', 'red wine', 'beef broth', 'salt', 'pepper', 'olive oil', 'parmesan cheese']), Ingredients(items=['flour', 'sugar', 'cocoa powder', 'baking powder', 'baking soda', 'salt', 'eggs', 'milk', 'vegetable oil', 'vanilla extract', 'hot water'])]


In [10]:
# Maintain the original structure of the responses
responses.to_pandas(explode=False)

Unnamed: 0,dish,items
0,pasta bolognese,"[pasta, bolognese sauce, ground beef, onion, g..."
1,chocolate cake,"[flour, sugar, cocoa powder, baking powder, ba..."


In [None]:
# Explode the list of ingredients into separate rows
responses.to_pandas(explode=True)

Unnamed: 0,dish,items
0,pasta bolognese,pasta
1,pasta bolognese,bolognese sauce
2,pasta bolognese,ground beef
3,pasta bolognese,onion
4,pasta bolognese,garlic
5,pasta bolognese,carrot
6,pasta bolognese,celery
7,pasta bolognese,red wine
8,pasta bolognese,tomato paste
9,pasta bolognese,beef broth


In [11]:
# Convert to simple python records
responses.to_records(explode=False)

[{'dish': 'pasta bolognese',
  'items': ['pasta',
   'bolognese sauce',
   'ground beef',
   'onion',
   'garlic',
   'carrot',
   'celery',
   'tomato paste',
   'red wine',
   'beef broth',
   'salt',
   'pepper',
   'olive oil',
   'parmesan cheese']},
 {'dish': 'chocolate cake',
  'items': ['flour',
   'sugar',
   'cocoa powder',
   'baking powder',
   'baking soda',
   'salt',
   'eggs',
   'milk',
   'vegetable oil',
   'vanilla extract',
   'hot water']}]

# Nested models
Define a more complicated output structure by referencing another response model. 

In this case a list of certain length containing instances of pre-defined response model.

In [12]:
class Sector(ResponseModel):
    sector: str = Field(
        description="Human-readable title(!) of the industrical sector (in NAICS taxonomy)",
        min_length=10,
        max_length=150,
    )
    subsector: str = Field(
        description="Human-readable title(!) of the industrial SUBsector (in NAICS taxonomy)",
        min_length=5,
        max_length=150,
    )
    sector_automation_potential: int = Field(
        description="A score from 1 to 10 indicating the sector's potential for automation",
        ge=0,
        le=10,
    )


class Sectors(ResponseModel):
    sectors: list[Sector] = Field(
        description="A list of 1 to 5 NAIC industrial sectors with their AI automation potential",
        min_length=1,
        max_length=5,
    )


sectors_prompt = Prompt.from_string(
    "List some industrial sector in the country of {{country}} that have great AI automation potential."
)

context = [{"country": "Germany"}, {"country": "United States"}, {"country": "Japan"}]
sectors_task = Task("sectors", prompt=sectors_prompt, response=Sectors)
responses = await sectors_task(context=context)

Iterating context:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
responses.to_pandas(explode=True)

Unnamed: 0,country,sector,subsector,sector_automation_potential
0,Germany,Manufacturing,Automobile Manufacturing,8
1,Germany,Healthcare,Medical Devices,7
2,Germany,Information Technology,Software Development,9
3,United States,Manufacturing,Aerospace Product and Parts Manufacturing,8
4,United States,Health Care,Medical Laboratories,9
5,United States,Finance and Insurance,"Securities, Commodity Contracts, and Other Fin...",7
6,Japan,Manufacturing,Automotive Manufacturing,8
7,Japan,Technology,Electronics Manufacturing,9
8,Japan,Healthcare,Medical Devices,7
9,Japan,Information Technology,Software Development,8


# Chain tasks together
Run multiple tasks one after the other, collecting the results in a single DataFrame.

Keep in mind here that the names of inputs of one task must be the same as the names of outputs in the previous one.

Here we extract first some industrial sectors for each input country, and then some job roles within each sector.

In [13]:
# Re-uses "sectors" task from previous code cell (!)

from cuery import Chain


class Job(ResponseModel):
    job_role: str = Field(description="Name of the job role (job title, less than 50 characters)")
    job_description: str = Field(
        description="A short description of the job role (less than 200 characters)"
    )
    job_automation_potential: int = Field(
        description="A score from 1 to 10 indicating the job's potential for automation",
        ge=0,
        le=10,
    )


class Jobs(ResponseModel):
    jobs: list[Job] = Field(
        description=(
            "A list of jobs with their AI automation potential and reasons for that potential"
        ),
    )


jobs_prompt = Prompt.from_string(
    "List some job roles with great AI automation potential in the country of {{country}} and the sector '{{sector}}'"
)

context = pd.DataFrame(
    {
        "country": ["Germany", "United States", "Japan"],
        "PIB": [4.0, 5.0, 3.5],
    }
)

jobs_task = Task("jobs", prompt=jobs_prompt, response=Jobs)
chain = Chain(sectors_task, jobs_task)
responses = await chain(context=context)

Iterating context:   0%|          | 0/3 [00:00<?, ?it/s]

Iterating context:   0%|          | 0/9 [00:00<?, ?it/s]

In [14]:
responses

Unnamed: 0,country,sector,job_role,job_description,job_automation_potential
0,Germany,Manufacturing,Automation Engineer,"Responsible for designing, programming, and ma...",9
1,Germany,Manufacturing,Robotics Technician,"Specialist in installing, maintaining, and tro...",8
2,Germany,Manufacturing,Quality Control Inspector,Ensures products meet quality standards throug...,7
3,Germany,Manufacturing,Data Analyst,Analyzes manufacturing data to optimize proces...,8
4,Germany,Healthcare,Clinical Data Analyst,Analyzing and interpreting complex healthcare ...,7
5,Germany,Healthcare,Healthcare Robotics Engineer,Designing and developing robotic systems for h...,8
6,Germany,Healthcare,Medical Research Scientist,Conducting research to improve medical treatme...,6
7,Germany,Healthcare,Health Informatics Specialist,Managing and analyzing healthcare data to impr...,8
8,Germany,Healthcare,Nurse Practitioner,Providing advanced nursing care and treatment ...,4
9,Germany,Finance and Insurance,Data Analyst,Analyzing and interpreting complex data sets t...,8
