# Cuery usage example

In [2]:
import importlib
from pathlib import Path
from pprint import pprint

import instructor
import openai
import pandas as pd
from rich.pretty import pprint

from cuery import utils
from cuery.work import DirceJobs, DirceTasks

GDRIVE = Path("~/Google Drive/Shared drives/Solutions").expanduser()

In [24]:
md = {
    "name": {
        "type": "string",
        "description": "Name of the job role (job title, less than 50 characters)",
        "min_length": 5,
        "max_length": 50,
    },
    "description": {
        "type": "string",
        "description": "A short description of the job role (less than 200 characters)",
        "min_length": 20,
        "max_length": 200,
    },
    "potential": {
        "type": "integer",
        "description": "A score from 1 to 10 indicating the job's potential for automation",
        "ge": 0,
        "le": 10,
    },
}

In [None]:
import pydantic
from pydantic import BaseModel, Field


def make_model(d: dict, name: str):
    """Create a pydantic model from a dictionary."""

    fields = {}
    for field_name, field_params in d.items():
        field_type = field_params.pop("type")
        if field_type == "string":
            field_type = str
        elif field_type == "integer":
            field_type = int
        elif field_type == "float":
            field_type = float
        elif field_type == "boolean":
            field_type = bool
        else:
            raise ValueError(f"Unknown type: {field_type}")

        fields[field_name] = (field_type, Field(..., **field_params))

    return pydantic.create_model(name, **fields)


pm = make_model(md, "Job")
pm.model_json_schema()

{'properties': {'name': {'description': 'Name of the job role (job title, less than 50 characters)',
   'maxLength': 50,
   'minLength': 5,
   'title': 'Name',
   'type': 'string'},
  'description': {'description': 'A short description of the job role (less than 200 characters)',
   'maxLength': 200,
   'minLength': 20,
   'title': 'Description',
   'type': 'string'},
  'potential': {'description': "A score from 1 to 10 indicating the job's potential for automation",
   'maximum': 10,
   'minimum': 0,
   'title': 'Potential',
   'type': 'integer'}},
 'required': ['name', 'description', 'potential'],
 'title': 'Job',
 'type': 'object'}

In [None]:
import importlib
from cuery import response

importlib.reload(response)

response.ResponseModel.from_dict("Job", md)

cuery.response.Job

## Utils

In [None]:
from cuery import prompt, response, utils

importlib.reload(utils)
importlib.reload(prompt)
importlib.reload(response)


p1 = prompt.Prompt.from_config("work/prompts", "dirce_jobs")
print(p1)

r1 = response.ResponseModel.from_config("work/models", "Job")
print(r1)

messages=[Message(role='system', content="You're an analyst at the Spanish 'Instituo Nacional de Estadística' (INE) analyzing\ndata from its 'Directorio Central de Empresas' (DIRCE). Your objective is to analyze\ngroups of companies, identified by a sector ('Division') and a corresponding main activity\n('Actividad principal') in order to identify jobs within those companies that are likely to\nbe automatable by AI. Both 'Division' and 'Actividad_principal' are provided in Spanish and may\ninclude numeric IDs that you can ignore if you don't understand them. Always respond in English.\nOnly consider jobs that are computer- or paper-based and can be automated by AI using software\n(don't include jobs automatable by robots or other physical means)."), Message(role='user', content='Please analyze the following jobs sector and identify jobs that are automatable by AI software.\nDivision: {{Division}}\nActividad principal: {{Actividad_principal}}')] required=['Division', 'Actividad_principa

# Jobs

In [3]:
DATA_DIR = GDRIVE / "Research/future_of_work/inputs/ine_dirce_aggregated_by_activity.csv"
data = pd.read_csv(DATA_DIR)
data.columns = [c.replace(" ", "_") for c in data.columns]
display(data)

n_examples = 5
context_df = data.iloc[:5]
context_dict = {col: context_df[col].tolist() for col in context_df.columns}
context_records = context_df.to_dict(orient="records")

Unnamed: 0,Division,Actividad_principal,Estimated_Employees_2024,Median_YoY_Growth_pct
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4
1,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2
2,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5
3,45 Venta y reparación de vehículos de motor y ...,451 Venta de vehículos de motor,102172,-0.5
4,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9
5,46 Comercio al por mayor e intermediarios del ...,464 Comercio al por mayor de artículos de uso ...,228324,-2.0
6,"47 Comercio al por menor, excepto de vehículos...",471 Comercio al por menor en establecimientos ...,233708,1.3
7,"47 Comercio al por menor, excepto de vehículos...",477 Comercio al por menor de otros artículos e...,242341,-1.3
8,49 Transporte terrestre y por tubería,493 Otro transporte terrestre de pasajeros,142413,0.5
9,49 Transporte terrestre y por tubería,494 Transporte de mercancías por carretera y s...,273002,-1.5


In [4]:
jobs = await DirceJobs(context_df, model="gpt-4.1-mini", n_concurrent=2)
jobs

Gathering responses: 100%|██████████| 5/5 [00:10<00:00,  2.07s/it]


[Jobs(jobs=[Job(name='Quality Control Analyst', description='Monitors and inspects meat products to ensure quality standards are met.', automation_potential=7, reason='Standardizable inspection criteria, use of image recognition AI.'), Job(name='Inventory Data Clerk', description='Manages and records inventory levels of raw and processed meat products.', automation_potential=8, reason='Routine data entry tasks can be automated with AI software.'), Job(name='Production Planner', description='Schedules and plans production runs to meet demand and minimize waste.', automation_potential=6, reason='AI can optimize scheduling using demand and production data.')]),
 Jobs(jobs=[Job(name='Inventory Clerk', description='Manages parts inventory for vehicle components, tracking stock and orders.', automation_potential=8, reason='Standardized data entry and inventory management can be automated.'), Job(name='Quality Assurance Analyst', description='Analyzes data from vehicle component inspections t

In [5]:
jobs_df = DirceJobs.explode_responses(jobs, context_df)
jobs_df = jobs_df.rename(
    columns={
        "name": "job",
        "description": "job_description",
        "automation_potential": "job_automation_potential",
        "reason": "job_automation_reason",
    }
)
jobs_df

Unnamed: 0,Division,Actividad_principal,Estimated_Employees_2024,Median_YoY_Growth_pct,job,job_description,job_automation_potential,job_automation_reason
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and inspects meat products to ensure ...,7,"Standardizable inspection criteria, use of ima..."
1,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Inventory Data Clerk,Manages and records inventory levels of raw an...,8,Routine data entry tasks can be automated with...
2,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Production Planner,Schedules and plans production runs to meet de...,6,AI can optimize scheduling using demand and pr...
3,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2,Inventory Clerk,Manages parts inventory for vehicle components...,8,Standardized data entry and inventory manageme...
4,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2,Quality Assurance Analyst,Analyzes data from vehicle component inspectio...,7,Data analysis and reporting can be enhanced an...
5,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2,Production Planner,Schedules production runs for vehicle parts ba...,7,Routine scheduling and forecasting tasks can b...
6,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5,Administrative Assistant,"Handles documentation, scheduling, and communi...",7,Routine paperwork and scheduling can be automa...
7,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5,Cost Estimator,Prepares cost estimates for electrical and plu...,6,AI can analyze data and generate estimates bas...
8,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5,Project Coordinator,Coordinates activities and resources for const...,5,Task coordination and communication can be par...
9,45 Venta y reparación de vehículos de motor y ...,451 Venta de vehículos de motor,102172,-0.5,Sales Assistant,Assists customers in the purchase of motor veh...,7,High potential for automating customer interac...


## Tasks

In [7]:
tasks = await DirceTasks(jobs_df, model="gpt-4.1-mini", n_concurrent=2)
tasks

Gathering responses: 100%|██████████| 15/15 [01:06<00:00,  4.45s/it]


[JobTasks(tasks=[JobTask(name='Data Entry and Report Generation', description='Automate the entry of quality control data and generation of compliance reports.', automation_potential=8, intelligence=4, sexyness=5, scalability=7, data_needs='Structured quality metrics, test results, and inspection data.', products=['Microsoft Power Automate', 'UiPath', 'Tableau']), JobTask(name='Pattern Recognition in Quality Data', description='Use AI to detect anomalies or patterns in quality control data to predict defects or non-compliance.', automation_potential=7, intelligence=7, sexyness=6, scalability=8, data_needs='Historical quality control data, defect logs, and inspection records.', products=['TensorFlow', 'IBM Watson', 'Google AI Platform']), JobTask(name='Standard Compliance Verification', description='Automate verification against industry regulations and quality standards using AI software.', automation_potential=7, intelligence=6, sexyness=4, scalability=6, data_needs='Regulations and s

In [9]:
tasks_df = DirceTasks.explode_responses(tasks, jobs_df)
tasks_df = tasks_df.rename(columns={"name": "task", "description": "task_description"})
tasks_df

Unnamed: 0,Division,Actividad_principal,Estimated_Employees_2024,Median_YoY_Growth_pct,job,job_description,job_automation_potential,job_automation_reason,task,task_description,automation_potential,intelligence,sexyness,scalability,data_needs,products
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and inspects meat products to ensure ...,7,"Standardizable inspection criteria, use of ima...",Data Entry and Report Generation,Automate the entry of quality control data and...,8,4,5,7,"Structured quality metrics, test results, and ...","[Microsoft Power Automate, UiPath, Tableau]"
1,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and inspects meat products to ensure ...,7,"Standardizable inspection criteria, use of ima...",Pattern Recognition in Quality Data,Use AI to detect anomalies or patterns in qual...,7,7,6,8,"Historical quality control data, defect logs, ...","[TensorFlow, IBM Watson, Google AI Platform]"
2,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and inspects meat products to ensure ...,7,"Standardizable inspection criteria, use of ima...",Standard Compliance Verification,Automate verification against industry regulat...,7,6,4,6,"Regulations and standards documents, quality c...","[Compliance.ai, Intelex, MasterControl]"
3,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and inspects meat products to ensure ...,7,"Standardizable inspection criteria, use of ima...",Document Management and Audit Trail,Automate organization and maintenance of quali...,8,4,4,7,"Quality documents, audit records, and revision...","[DocuWare, M-Files, SharePoint]"
4,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and inspects meat products to ensure ...,7,"Standardizable inspection criteria, use of ima...",Automated Sampling Scheduling,Use AI to plan and schedule product sampling f...,6,5,5,6,"Production schedules, batch data, and previous...","[SAP QM, Oracle Quality Management, Siemens Op..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Service Representative,Provides product information and resolves serv...,6,"AI chatbots can handle common queries, though ...",Responding to Common Customer Inquiries,Automatically answer frequently asked question...,8,5,6,9,"Access to FAQs, product details, order database","[Zendesk Answer Bot, Freshdesk AI, Intercom]"
70,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Service Representative,Provides product information and resolves serv...,6,"AI chatbots can handle common queries, though ...",Order Processing and Tracking Updates,"Automate order entry, status updates, and ship...",9,6,5,8,"Order management system, shipment tracking data","[Shopify Flow, Zoho CRM, Salesforce Service Cl..."
71,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Service Representative,Provides product information and resolves serv...,6,"AI chatbots can handle common queries, though ...",Customer Feedback Collection and Analysis,"Automatically gather, categorize, and analyze ...",7,7,6,7,"Survey responses, customer reviews, sentiment ...","[SurveyMonkey, Qualtrics, Medallia]"
72,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Service Representative,Provides product information and resolves serv...,6,"AI chatbots can handle common queries, though ...",Generating Customer Reports,Create automated reports summarizing customer ...,8,6,4,8,"Customer interaction logs, service data","[Tableau, Power BI, Google Data Studio]"


In [21]:
pprint(tasks.DirceJobs.prompt)
pprint(tasks.DirceJobs.response.model_json_schema())