# Cuery usage example

In [7]:
import importlib
from pathlib import Path

import instructor
import openai
import pandas as pd

from cuery import Prompt, pprint, response, task, utils
from cuery.work import CountrySectors, DirceJobs, DirceTasks, SpanishSectors

GDRIVE = Path("~/Google Drive/Shared drives/Solutions").expanduser()

# State

In [2]:
system = """
You're a helpful assistant extracting new fruit names from messages if they are not already in the specified basket.
ALWAYS extract fruit names as singular nouns (e.g. 'apple' instead of 'apples').
ONLY return fruits NOT already mentioned between the <basket>...</basket> tags.
If NO fruits are mentioned, or ALL mentioned fruits are already in the basket, return an empty list.
Only extract fruits from the text between the <text>...</text> tags.

# Examples (DO NOT use these in response):

## Example 1 (don't include already mentioned fruits):
<basket>apple, strawberry</basket>
<text>The cereal contains coconuts, strawberries and raspberries.</text>
YOUR RESPONSE: [coconut, raspberry]

## Example 2 (return empty list if no new fruits):
<basket>mango, coconut</basket>
<text>The cereal contains coconuts, strawberries and mangoes.</text>
YOUR RESPONSE: []
"""

user = """
Extract new fruit names from the text below if they are not already in the basket.
<basket></basket>
<text>{{text}}</text>
"""

prompt = Prompt(
    messages=[
        {"role": "system", "content": system},
        {"role": "user", "content": user},
    ],
    required=["text"],
)

pprint(prompt)

In [None]:
from cuery import Field, Prompt, ResponseModel, Task


class Fruits(ResponseModel):
    names: list[str] = Field(
        ...,
        description="(Possibly empty) list of fruit names extracted from the text.",
        min_length=0,
    )


context = [
    {"text": "I've bought an apple and a bunch of bananas."},
    {"text": "Can you pass me the oranges?"},
    {"text": "I can't find my glasses."},
    {"text": "Does the store have any mangoes or apples?"},
    {"text": "The monkey stole my banana!!"},
]


def update_state(response: ResponseModel, prompt: Prompt, context: dict) -> None:
    """Update list of extracted fruits and update prompt."""
    fruits = response.names
    if fruits:
        msg = prompt.messages[1].content
        pre, rest = msg.rsplit("<basket>", maxsplit=1)
        items, post = rest.split("</basket>")
        items = [i.strip() for i in items.split(",") if i]
        items = set(items) | set(fruits)
        new_msg = pre + "<basket>" + ", ".join(sorted(items)) + "</basket>" + post
        prompt.messages[1].content = new_msg


fruit_task = Task(prompt=prompt, response=Fruits, log_prompt=False)
result = await fruit_task.iter(context, callback=update_state, model="openai/gpt-4.1-mini")
result.to_pandas(explode=False)

Iterating context:   0%|          | 0/5 [00:00<?, ?it/s]

Unnamed: 0,text,names
0,I've bought an apple and a bunch of bananas.,"[apple, banana]"
1,Can you pass me the oranges?,[orange]
2,I can't find my glasses.,[]
3,Does the store have any mangoes or apples?,[mango]
4,The monkey stole my banana!!,[banana]


In [4]:
pprint(task.query_log.queries[3])

# Starting without input context
"Pure prompt"

In [5]:
context = [
    {"country": "Unites States"},
    {"country": "Spain"},
]
sectors = await CountrySectors(context, n_concurrent=20)
sectors.to_pandas()

Gathering responses: 100%|██████████| 2/2 [00:08<00:00,  4.36s/it]


Unnamed: 0,country,sector,subsector,sector_automation_potential
0,Unites States,Health Care and Social Assistance,Health Care Services,8
1,Unites States,Manufacturing,Machinery Manufacturing,9
2,Unites States,Retail Trade,Electronics and Appliance Stores,7
3,Unites States,"Professional, Scientific, and Technical Services",Computer Systems Design and Related Services,10
4,Unites States,Finance and Insurance,Insurance Carriers,8
5,Spain,Health Care and Social Assistance,Ambulatory Health Care Services,7
6,Spain,Retail Trade,Motor Vehicle and Parts Dealers,8
7,Spain,Manufacturing,Machinery Manufacturing,9
8,Spain,Finance and Insurance,Credit Intermediation and Related Activities,8
9,Spain,Information,"Data Processing, Hosting, and Related Services",9


In [9]:
chain = task.Chain(SpanishSectors, DirceJobs, DirceTasks)
result = await chain(model="openai/gpt-4.1-mini", n_concurrent=20)
result

Gathering responses: 100%|██████████| 5/5 [00:04<00:00,  1.21it/s]


Gathering responses: 100%|██████████| 17/17 [00:14<00:00,  1.15it/s]


Unnamed: 0,sector,subsector,job_role,task,task_description,task_automation_potential,intelligence,sexyness,scalability,data_needs,products
0,Manufacturing,Computer and Electronic Product Manufacturing,Data Entry Clerk,Data Entry,Inputting data from various sources into compu...,9,3,4,8,"Structured or semi-structured data from forms,...","[UiPath, Automation Anywhere, Blue Prism, ABBY..."
1,Manufacturing,Computer and Electronic Product Manufacturing,Data Entry Clerk,Data Validation and Verification,Checking the accuracy and consistency of data ...,8,5,3,7,Access to entered data and validation rules or...,"[DataRobot, Trifacta, Talend, Informatica]"
2,Manufacturing,Computer and Electronic Product Manufacturing,Data Entry Clerk,Generating Reports,Creating routine reports based on entered data...,7,4,5,7,Data from databases or data entry systems and ...,"[Tableau, Power BI, Qlik Sense, Google Data St..."
3,Manufacturing,Computer and Electronic Product Manufacturing,Data Entry Clerk,Document Management,"Organizing, storing, and retrieving electronic...",7,4,4,6,Electronic document files with metadata and in...,"[M-Files, DocuWare, SharePoint, Laserfiche]"
4,Manufacturing,Computer and Electronic Product Manufacturing,Data Entry Clerk,Email Correspondence Handling,Sorting and responding to routine emails relat...,6,6,4,5,Access to email accounts and predefined respon...,"[Microsoft Outlook Rules, Google Workspace, Ze..."
...,...,...,...,...,...,...,...,...,...,...,...
83,"Professional, Scientific, and Technical Services",Computer Systems Design and Related Services,Technical Writer,Document Formatting and Styling,Automatically apply consistent formatting and ...,9,4,5,9,"Formatting rules, style guides, document templ...","[Grammarly, Adobe InDesign, Microsoft Word AI]"
84,"Professional, Scientific, and Technical Services",Computer Systems Design and Related Services,Technical Writer,Terminology Consistency Checking,Ensure consistent use of terminology throughou...,8,6,4,8,"Terminology databases, document text.","[Acrolinx, SDL Trados, Terminus]"
85,"Professional, Scientific, and Technical Services",Computer Systems Design and Related Services,Technical Writer,Grammar and Spell Checking,Automatically check and correct grammar and sp...,10,5,7,10,"Document text, linguistic rules databases.","[Grammarly, ProWritingAid, LanguageTool]"
86,"Professional, Scientific, and Technical Services",Computer Systems Design and Related Services,Technical Writer,Content Summarization,Generate summaries of longer technical documen...,7,7,6,7,"Full document text, topic relevance data.","[ChatGPT, SummarizeBot, SMMRY]"


In [10]:
display(chain._usage)
chain._usage[["prompt", "completion", "cost"]].sum()

Unnamed: 0,prompt,completion,cost,task_index,task
0,313,130,0.01719,0,Sectors
0,428,152,0.02196,1,Jobs
1,431,159,0.02247,1,Jobs
2,430,160,0.0225,1,Jobs
3,434,150,0.02202,1,Jobs
4,435,260,0.02865,1,Jobs
0,546,393,0.03996,2,JobTasks
1,546,397,0.0402,2,JobTasks
2,545,382,0.03927,2,JobTasks
3,549,364,0.03831,2,JobTasks


prompt        12827.00000
completion     8103.00000
cost              0.87099
dtype: float64

In [12]:
chain = task.Chain(CountrySectors, DirceJobs, DirceTasks)
result = await chain({"country": "Spain"}, model="openai/gpt-4.1-mini", n_concurrent=20)
result

Gathering responses: 100%|██████████| 5/5 [00:05<00:00,  1.02s/it]


Gathering responses: 100%|██████████| 17/17 [00:13<00:00,  1.25it/s]


Unnamed: 0,sector,subsector,job_role,task,task_description,task_automation_potential,intelligence,sexyness,scalability,data_needs,products
0,Information,Software Publishers,Software Tester,Test Case Generation,Automatically generate test cases based on sof...,8,6,6,9,"Detailed software requirements, user stories, ...","[Testim, Functionize, Test.ai]"
1,Information,Software Publishers,Software Tester,Automated Test Execution,Run automated tests to verify software functio...,10,4,5,10,"Test scripts, software builds, and execution e...","[Selenium, Appium, TestComplete]"
2,Information,Software Publishers,Software Tester,Bug/Ticket Logging,Automatically log bugs or issues detected duri...,9,7,4,8,"Detailed test results, error logs, and context...","[Jira, Bugzilla, GitHub Issues]"
3,Information,Software Publishers,Software Tester,Regression Test Automation,Perform automatic regression testing every sof...,10,5,5,10,"Previous test cases, current software builds, ...","[Tricentis Tosca, Ranorex, Katalon]"
4,Information,Software Publishers,Software Tester,Test Result Analysis,Automatically analyze test results to identify...,7,7,6,7,"Comprehensive test logs, historical results, a...","[Test.ai, Applitools, QASymphony]"
...,...,...,...,...,...,...,...,...,...,...,...
80,Wholesale Trade,"Merchant Wholesalers, Durable Goods",Sales Support Specialist,Data entry and order processing,Automate entering sales orders and customer in...,8,4,3,9,"Customer order details, product specifications...","[Salesforce, Zoho CRM, SAP ERP]"
81,Wholesale Trade,"Merchant Wholesalers, Durable Goods",Sales Support Specialist,Customer query response automation,Automate responses to common customer inquirie...,7,6,5,8,"Frequently asked questions, customer interacti...","[Zendesk Answer Bot, LivePerson, Intercom]"
82,Wholesale Trade,"Merchant Wholesalers, Durable Goods",Sales Support Specialist,Sales reporting and analytics,Generate sales reports and analyze sales data ...,8,7,6,8,"Sales transactions, customer demographics, sal...","[Tableau, Power BI, Looker]"
83,Wholesale Trade,"Merchant Wholesalers, Durable Goods",Sales Support Specialist,Inventory tracking and updates,Monitor and update inventory levels automatica...,7,5,4,9,"Inventory data, sales data, supplier restock s...","[NetSuite, TradeGecko, Fishbowl Inventory]"


# Jobs

In [None]:
DATA_DIR = GDRIVE / "Research/future_of_work/inputs/ine_dirce_aggregated_by_activity.csv"
data = pd.read_csv(DATA_DIR)
data.columns = [c.replace(" ", "_") for c in data.columns]
data = data.rename(columns={"Division": "sector", "Actividad_principal": "subsector"})
display(data)

n_examples = 5
df = data.iloc[:5]

In [7]:
pprint(DirceJobs)

## Task chain

In [11]:
chain = task.Chain(DirceJobs, DirceTasks)
result = await chain(data, model="gpt-4.1-mini", n_concurrent=20)
result

Gathering responses: 100%|██████████| 23/23 [00:06<00:00,  3.32it/s]


Gathering responses: 100%|██████████| 69/69 [00:27<00:00,  2.53it/s]


Unnamed: 0,sector,subsector,job_role,task,task_description,task_automation_potential,intelligence,sexyness,scalability,data_needs,products
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,Quality Control Analyst,Data Entry and Report Generation,Automate entering quality control data and gen...,8,5,4,9,"Quality inspection data, test results, report ...","[AutoEntry, Microsoft Power Automate, UiPath]"
1,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,Quality Control Analyst,Statistical Analysis,Perform automated statistical analysis of qual...,7,7,5,8,"Historical quality data, control charts, proce...","[Minitab, JMP, SPSS]"
2,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,Quality Control Analyst,Document Control and Compliance Tracking,Automate monitoring and updating compliance re...,8,6,3,8,"Compliance checklists, certificates, document ...","[DocuWare, ConvergePoint, PaperTracer]"
3,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,Quality Control Analyst,Sampling Schedule Management,Automate scheduling and reminders for routine ...,7,4,6,7,"Sampling schedules, testing frequency data, ca...","[Schedly, When I Work, Deputy]"
4,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,Quality Control Analyst,Quality Trend Reporting,Create automated reports highlighting trend ch...,7,6,5,8,"Time series quality data, production logs, rep...","[Tableau, Power BI, Qlik Sense]"
...,...,...,...,...,...,...,...,...,...,...,...
342,85 Educación,855 Otra educación,Curriculum Research Assistant,Literature Review Summarization,Automatically summarizing research articles an...,8,7,6,9,Access to digital research articles and educat...,"[Zamzar AI summarizer, Scholarcy]"
343,85 Educación,855 Otra educación,Curriculum Research Assistant,Data Collection and Organization,Gathering and structuring data from various ed...,9,6,5,8,Databases of educational surveys and research ...,"[Microsoft Power Automate, Zapier]"
344,85 Educación,855 Otra educación,Curriculum Research Assistant,Trend Analysis in Education,Analyzing trends from research data and educat...,7,7,6,7,Historical and current educational trend data.,"[Tableau, Google Data Studio]"
345,85 Educación,855 Otra educación,Curriculum Research Assistant,Report Generation,Automatically generating structured reports an...,8,6,7,8,Formatted research data and analysis results.,"[Grammarly Business, Jasper AI]"


In [13]:
chain._usage[["prompt", "completion", "cost"]].sum()

prompt        50067.00000
completion    29686.00000
cost              3.28317
dtype: float64

In [14]:
result.to_csv(
    GDRIVE / "Research/future_of_work/outputs/ine_dirce_ai_tasks.csv",
    index=False,
)

## Test LiteLLM

In [7]:
...

Ellipsis

## Test Perplexity

In [None]:
jobs_perp = await DirceJobs(df, model="perplexity/sonar", n_concurrent=2)