# Cuery usage example

In [1]:
import importlib
from pathlib import Path
from pprint import pprint

import instructor
import openai
import pandas as pd
from rich.pretty import pprint

from cuery import utils
from cuery.work import DirceJobs, DirceTasks

GDRIVE = Path("~/Google Drive/Shared drives/Solutions").expanduser()

## Utils

In [None]:
import textwrap
from inspect import cleandoc


def dedent(text):
    """Dedent a string, removing leading whitespace."""
    text = cleandoc(text)
    paragraphs = text.split("\n\n")
    paragraphs = [p.replace("\n", " ") for p in paragraphs]
    return "\n\n".join(paragraphs).strip()


raw = """
    This is a test
    that spans multiple lines
    and needs cleanup.

    This is a second paragraph.
"""

raw2 = """This is a test
    that spans multiple lines
    and needs cleanup.

    This is a second paragraph.
"""

print(dedent(raw))
print("---")
print(dedent(raw2))

This is a test that spans multiple lines and needs cleanup.

This is a second paragraph.
---
This is a test that spans multiple lines and needs cleanup.

This is a second paragraph.


This is a test that spans multiple lines and needs cleanup.

This is a second paragraph.


# Jobs

In [2]:
DATA_DIR = GDRIVE / "Research/future_of_work/inputs/ine_dirce_aggregated_by_activity.csv"
data = pd.read_csv(DATA_DIR)
data.columns = [c.replace(" ", "_") for c in data.columns]
display(data)

n_examples = 5
context_df = data.iloc[:5]
context_dict = {col: context_df[col].tolist() for col in context_df.columns}
context_records = context_df.to_dict(orient="records")

Unnamed: 0,Division,Actividad_principal,Estimated_Employees_2024,Median_YoY_Growth_pct
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4
1,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2
2,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5
3,45 Venta y reparación de vehículos de motor y ...,451 Venta de vehículos de motor,102172,-0.5
4,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9
5,46 Comercio al por mayor e intermediarios del ...,464 Comercio al por mayor de artículos de uso ...,228324,-2.0
6,"47 Comercio al por menor, excepto de vehículos...",471 Comercio al por menor en establecimientos ...,233708,1.3
7,"47 Comercio al por menor, excepto de vehículos...",477 Comercio al por menor de otros artículos e...,242341,-1.3
8,49 Transporte terrestre y por tubería,493 Otro transporte terrestre de pasajeros,142413,0.5
9,49 Transporte terrestre y por tubería,494 Transporte de mercancías por carretera y s...,273002,-1.5


In [3]:
jobs = await DirceJobs(context_df, model="gpt-4.1-mini")
jobs

Iterating context: 5item [00:08,  1.65s/item]


[Jobs(jobs=[Job(name='Quality Control Analyst', description='Inspects meat products for quality and compliance with health standards.', automation_potential=8, reason='Routine data analysis and image recognition for defect detection.'), Job(name='Production Planner', description='Schedules production processes and manages workflow in meat processing plants.', automation_potential=7, reason='Can be optimized using AI-driven scheduling and resource allocation software.'), Job(name='Inventory Manager', description='Manages stock levels of raw materials and finished meat products.', automation_potential=8, reason='Inventory tracking and ordering can be automated with AI systems.')]),
 Jobs(jobs=[Job(name='Data Entry Clerk', description='Handles input and management of vehicle component production data.', automation_potential=8, reason='Repetitive data input tasks with well-defined rules.'), Job(name='Inventory Control Specialist', description='Manages inventory records and tracks stock lev

In [6]:
jobs_df = DirceJobs.explode_responses(jobs, context_df)
jobs_df = jobs_df.rename(
    columns={
        "name": "job",
        "description": "job_description",
        "automation_potential": "job_automation_potential",
        "reason": "job_automation_reason",
    }
)
jobs_df

Unnamed: 0,Division,Actividad_principal,Estimated_Employees_2024,Median_YoY_Growth_pct,job,job_description,job_automation_potential,job_automation_reason
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and evaluates product quality by anal...,8,Standardized testing and data analysis can be ...
1,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Inventory Manager,Manages stock levels and orders supplies based...,7,AI can predict demand and optimize inventory m...
2,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Production Data Entry Clerk,Inputs production data into systems to track p...,9,Data entry tasks are repetitive and can be ful...
3,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2,Inventory Clerk,Manages the tracking and recording of inventor...,8,Repetitive data entry and monitoring can be au...
4,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2,Quality Control Analyst,Inspects component data for defects and compli...,7,Data analysis and anomaly detection can be don...
5,"29 Fabricación de vehículos de motor, remolque...","293 Fabricación de componentes, piezas y acces...",97484,-0.2,Production Planner,Schedules production runs and resource allocat...,6,Planning and scheduling can be optimized and a...
6,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5,Project Coordinator,"Coordinates schedules, resources, and informat...",7,Can be largely automated by AI with scheduling...
7,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5,Electrical Design Drafter,Creates technical drawings and plans for elect...,8,AI software can generate and modify technical ...
8,43 Actividades de construcción especializada,"432 Instalaciones eléctricas, de fontanería y ...",303430,0.5,Plumbing System Estimator,Estimates costs and materials required for plu...,7,AI can automate estimation using databases and...
9,45 Venta y reparación de vehículos de motor y ...,451 Venta de vehículos de motor,102172,-0.5,Sales Representative,"Handles customer inquiries, processes sales co...",7,"Automatable tasks include data entry, contract..."


## Tasks

In [7]:
tasks = await DirceTasks(jobs_df, model="gpt-4.1-mini")
tasks

[JobTasks(tasks=[JobTask(name='Data Entry and Reporting', description='Automating the input and compilation of quality control data into standardized reports.', automation_potential=9, intelligence=4, sexyness=5, scalability=9, data_needs='Historical and real-time quality control data records.', products=['Microsoft Power Automate', 'UiPath', 'Automation Anywhere']), JobTask(name='Defect Pattern Recognition', description='Using AI to analyze product quality data to identify common defects and their patterns.', automation_potential=8, intelligence=7, sexyness=6, scalability=8, data_needs='Images and defect logs from past quality inspections.', products=['IBM Watson Visual Recognition', 'Google Cloud Vision API']), JobTask(name='Compliance Monitoring', description='Automating the monitoring of regulatory compliance related to food safety standards and documentation.', automation_potential=8, intelligence=6, sexyness=4, scalability=8, data_needs='Regulatory guidelines and audit records.',

In [8]:
tasks_df = DirceTasks.explode_responses(tasks, jobs_df)
tasks_df = tasks_df.rename(columns={"name": "task", "description": "task_description"})
tasks_df

Unnamed: 0,Division,Actividad_principal,Estimated_Employees_2024,Median_YoY_Growth_pct,job,job_description,job_automation_potential,job_automation_reason,task,task_description,automation_potential,intelligence,sexyness,scalability,data_needs,products
0,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and evaluates product quality by anal...,8,Standardized testing and data analysis can be ...,Data Entry and Reporting,Automating the input and compilation of qualit...,9,4,5,9,Historical and real-time quality control data ...,"[Microsoft Power Automate, UiPath, Automation ..."
1,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and evaluates product quality by anal...,8,Standardized testing and data analysis can be ...,Defect Pattern Recognition,Using AI to analyze product quality data to id...,8,7,6,8,Images and defect logs from past quality inspe...,"[IBM Watson Visual Recognition, Google Cloud V..."
2,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and evaluates product quality by anal...,8,Standardized testing and data analysis can be ...,Compliance Monitoring,Automating the monitoring of regulatory compli...,8,6,4,8,Regulatory guidelines and audit records.,"[MetricStream, SAP GRC]"
3,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Quality Control Analyst,Monitors and evaluates product quality by anal...,8,Standardized testing and data analysis can be ...,Trend Analysis and Forecasting,Analyzing quality control trends over time to ...,7,8,7,7,Historical quality control and production data.,"[Tableau, Power BI, RapidMiner]"
4,10 Industria de la alimentación,101 Procesado y conservación de carne y elabor...,134055,-2.4,Inventory Manager,Manages stock levels and orders supplies based...,7,AI can predict demand and optimize inventory m...,Inventory Data Entry,"Automate the input of stock levels, deliveries...",9,4,3,9,"Requires structured data from stock records, d...","[SAP Inventory Management, Oracle NetSuite, Zo..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Support Representative,Provides information and assistance to clients...,6,Routine inquiries can be handled by AI chatbot...,Responding to common inquiries,Automatically responding to frequently asked q...,9,5,6,9,Historical customer inquiry data and response ...,"[Zendesk Answer Bot, Freshdesk AI]"
70,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Support Representative,Provides information and assistance to clients...,6,Routine inquiries can be handled by AI chatbot...,Order status updates,Providing real-time updates on order status to...,8,4,5,9,Order tracking data and customer identificatio...,"[Shopify Order Status, Salesforce Service Cloud]"
71,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Support Representative,Provides information and assistance to clients...,6,Routine inquiries can be handled by AI chatbot...,Processing returns and refunds,Automating the processing of return requests a...,7,6,4,8,Return request forms and policy documentation,"[Returnly, Zendesk Returns Management]"
72,46 Comercio al por mayor e intermediarios del ...,463 Comercio al por mayor de productos aliment...,325970,-0.9,Customer Support Representative,Provides information and assistance to clients...,6,Routine inquiries can be handled by AI chatbot...,Customer feedback collection,Automatically collecting and analyzing custome...,8,5,6,7,Customer contact information and feedback subm...,"[SurveyMonkey, Qualtrics]"


In [21]:
pprint(tasks.DirceJobs.prompt)
pprint(tasks.DirceJobs.response.model_json_schema())