# Construct pydantic model from text input

In [2]:
from pydantic_ai import Agent
agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in sweden, shortly")
result

AgentRunResult(output='**Lars Andersson, 32**\n\nA calm and meticulous **DevOps Engineer** at a Stockholm-based SaaS company focusing on renewable energy solutions. Lars spends his days optimizing cloud infrastructure on AWS, automating deployment pipelines with Kubernetes, and ensuring the stability of their analytics platform.\n\nHe commutes by bicycle year-round, starting his mornings with strong filter coffee and a concise stand-up. Reserved but always helpful, he\'s known for his insightful, data-driven solutions and his dry wit during "fika" breaks. Outside of work, you\'ll find him hiking in Tyresta National Park or tinkering with his home server.')

In [7]:
from pydantic import BaseModel, Field


class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30_000, lt=50_000)
    position: str


result = await agent.run(
    "Give me an IT employee working in sweden", output_type=EmployeeModel
)
result

AgentRunResult(output=EmployeeModel(name='Björn', age=40, salary=45000, position='IT Consultant'))

In [10]:
result.output

EmployeeModel(name='Björn', age=40, salary=45000, position='IT Consultant')

In [14]:
result.output.name, result.output.age, result.output.salary

('Björn', 40, 45000)

In [15]:
result.output.model_dump()

{'name': 'Björn', 'age': 40, 'salary': 45000, 'position': 'IT Consultant'}

In [20]:
result = await agent.run(
    "Give me ten employees in AI and data engineering fields, so the roles can vary, salary must be between 30000 and 50000",
    output_type=list[EmployeeModel],
)
result

AgentRunResult(output=[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'), EmployeeModel(name='Bob Johnson', age=35, salary=48000, position='Data Engineer'), EmployeeModel(name='Carol White', age=28, salary=42000, position='Machine Learning Engineer'), EmployeeModel(name='David Brown', age=32, salary=40000, position='Data Scientist'), EmployeeModel(name='Eva Green', age=38, salary=49000, position='MLOps Engineer'), EmployeeModel(name='Frank Black', age=31, salary=37000, position='Big Data Engineer'), EmployeeModel(name='Grace Lee', age=29, salary=46000, position='AI Research Scientist'), EmployeeModel(name='Henry Wong', age=34, salary=39000, position='Cloud Data Engineer'), EmployeeModel(name='Ivy Chen', age=27, salary=43000, position='Junior AI Engineer'), EmployeeModel(name='Jack Kim', age=36, salary=47000, position='Senior Data Engineer')])

In [21]:
result.output

[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob Johnson', age=35, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Carol White', age=28, salary=42000, position='Machine Learning Engineer'),
 EmployeeModel(name='David Brown', age=32, salary=40000, position='Data Scientist'),
 EmployeeModel(name='Eva Green', age=38, salary=49000, position='MLOps Engineer'),
 EmployeeModel(name='Frank Black', age=31, salary=37000, position='Big Data Engineer'),
 EmployeeModel(name='Grace Lee', age=29, salary=46000, position='AI Research Scientist'),
 EmployeeModel(name='Henry Wong', age=34, salary=39000, position='Cloud Data Engineer'),
 EmployeeModel(name='Ivy Chen', age=27, salary=43000, position='Junior AI Engineer'),
 EmployeeModel(name='Jack Kim', age=36, salary=47000, position='Senior Data Engineer')]

## CV model - a more complex and nested model

In [None]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int


class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    start_year: int
    end_year: int


class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]


result = await agent.run(
    "Create a fake person that is applying for a data engineering job",
    output_type=CvModel,
)
result


AgentRunResult(output=CvModel(name='Alice Smith', age=30, experiences=[ExperienceModel(title='Senior Data Engineer', company='Tech Solutions Inc.', description='Designed and implemented scalable data pipelines using Apache Spark and Kafka. Managed data warehouses on AWS Redshift. Developed ETL processes for various data sources.', start_year=2020, end_year=2024), ExperienceModel(title='Data Engineer', company='DataFlow Corp.', description='Built and maintained data infrastructure. Developed SQL queries for data extraction and transformation. Collaborated with data scientists to deploy machine learning models.', start_year=2017, end_year=2020)], educations=[EducationModel(title='Master of Science', education_area='Data Science', school='University of Technology', description='Focused on big data technologies, machine learning, and statistical modeling.', start_year=2015, end_year=2017), EducationModel(title='Bachelor of Science', education_area='Computer Science', school='State Universi

In [24]:
result.output.name

'Alice Smith'

In [26]:
result.output.age

30

In [31]:
result.output.experiences[0].title, result.output.experiences[0].start_year

('Senior Data Engineer', 2020)

## (optional) Postprocessing - load into duckdb and unnesting

This part is optional, but a way to unnest the data and store it could be to use dlt to load the data into duckdb, followed by joining and unnesting.

Other approach could be to store into nosql such as mongodb.

In [64]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="cv_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging",
)

info = pipeline.run(
    data=[result.output.model_dump()], loader_file_format="jsonl", table_name="cv_entries"
)

print(info)


Pipeline cv_json_duckdb load step completed in 0.03 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/aigineer/Documents/explorations/AI_engineering_learning/pydantic_ai/cv.duckdb location to store data
Load package 1763905356.678611 is LOADED and contains no failed jobs


In [70]:
import duckdb 

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc;").df()
    cv_entries = conn.sql("FROM staging.cv_entries").df()
    educations = conn.sql("FROM staging.cv_entries__educations").df()
    experiences = conn.sql("FROM staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__educations,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__experiences,"[title, company, description, start_year, end_...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [73]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Alice Smith,30,1763905347.870002,rw5JexJHZpP9mA
1,Alice Smith,30,1763905356.678611,Dvx3vSypSQIxbw


In [71]:
educations

Unnamed: 0,title,education_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Master of Science,Data Science,University of Technology,"Focused on big data technologies, machine lear...",2015,2017,rw5JexJHZpP9mA,0,DfMlt1JItvp9JA
1,Bachelor of Science,Computer Science,State University,"Specialized in algorithms, data structures, an...",2011,2015,rw5JexJHZpP9mA,1,YAGsx5nxRBpkSA
2,Master of Science,Data Science,University of Technology,"Focused on big data technologies, machine lear...",2015,2017,Dvx3vSypSQIxbw,0,oZuO99/4iZBgog
3,Bachelor of Science,Computer Science,State University,"Specialized in algorithms, data structures, an...",2011,2015,Dvx3vSypSQIxbw,1,c/Bz14xNpYeoRg


In [72]:
experiences

Unnamed: 0,title,company,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Senior Data Engineer,Tech Solutions Inc.,Designed and implemented scalable data pipelin...,2020,2024,rw5JexJHZpP9mA,0,6SBhidmihZdApg
1,Data Engineer,DataFlow Corp.,Built and maintained data infrastructure. Deve...,2017,2020,rw5JexJHZpP9mA,1,/NrMMIM2tP15JA
2,Senior Data Engineer,Tech Solutions Inc.,Designed and implemented scalable data pipelin...,2020,2024,Dvx3vSypSQIxbw,0,0d9zC6TATUeSmQ
3,Data Engineer,DataFlow Corp.,Built and maintained data infrastructure. Deve...,2017,2020,Dvx3vSypSQIxbw,1,660kwcXRBxvgGA


In [85]:
duckdb.sql("""
    SELECT 
        cv.name, 
        cv.age, 
        ex.company,
        ex.description AS experience_description,
        ex.start_year AS experience_start_year,
        ex.end_year AS experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year AS education_start_year,
        e.end_year AS education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
    

""").df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,Alice Smith,30,DataFlow Corp.,Built and maintained data infrastructure. Deve...,2017,2020,Master of Science,Data Science,University of Technology,2015,2017
1,Alice Smith,30,DataFlow Corp.,Built and maintained data infrastructure. Deve...,2017,2020,Bachelor of Science,Computer Science,State University,2011,2015
2,Alice Smith,30,DataFlow Corp.,Built and maintained data infrastructure. Deve...,2017,2020,Master of Science,Data Science,University of Technology,2015,2017
3,Alice Smith,30,DataFlow Corp.,Built and maintained data infrastructure. Deve...,2017,2020,Bachelor of Science,Computer Science,State University,2011,2015
4,Alice Smith,30,Tech Solutions Inc.,Designed and implemented scalable data pipelin...,2020,2024,Master of Science,Data Science,University of Technology,2015,2017
5,Alice Smith,30,Tech Solutions Inc.,Designed and implemented scalable data pipelin...,2020,2024,Bachelor of Science,Computer Science,State University,2011,2015
6,Alice Smith,30,Tech Solutions Inc.,Designed and implemented scalable data pipelin...,2020,2024,Master of Science,Data Science,University of Technology,2015,2017
7,Alice Smith,30,Tech Solutions Inc.,Designed and implemented scalable data pipelin...,2020,2024,Bachelor of Science,Computer Science,State University,2011,2015
