# Construct pydantic model from text input

In [1]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in Sweden, keep it short")
result

AgentRunResult(output='**Erik Svensson**, a **DevOps Engineer** at **Syncron Technologies** in Stockholm.')

In [2]:
print(result.output)

**Erik Svensson**, a **DevOps Engineer** at **Syncron Technologies** in Stockholm.


In [3]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30_000, lt=50_000)
    position: str

result = await agent.run(
    "Give me an IT employee working in sweden", output_type=EmployeeModel
)

result

AgentRunResult(output=EmployeeModel(name='Bjorn', age=30, salary=45000, position='IT Consultant'))

In [4]:
employee = result.output
employee

EmployeeModel(name='Bjorn', age=30, salary=45000, position='IT Consultant')

In [5]:
employee.name, employee.age, employee.position

('Bjorn', 30, 'IT Consultant')

In [6]:
employee.model_dump()

{'name': 'Bjorn', 'age': 30, 'salary': 45000, 'position': 'IT Consultant'}

In [7]:
print(employee.model_dump_json(indent=2))

{
  "name": "Bjorn",
  "age": 30,
  "salary": 45000,
  "position": "IT Consultant"
}


### Simulate several employees or a list of employees

In [8]:
result = await agent.run(
    """Give me ten employees in AI and data engineering fields,
    roles can vary, salary must be between 30000 and 50000""",
    output_type=list[EmployeeModel],
)
employees = result.output
employees

[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob Johnson', age=35, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Carol Williams', age=28, salary=42000, position='Machine Learning Engineer'),
 EmployeeModel(name='David Brown', age=40, salary=49999, position='Senior Data Engineer'),
 EmployeeModel(name='Eve Davis', age=32, salary=40000, position='AI Researcher'),
 EmployeeModel(name='Frank Miller', age=38, salary=47000, position='Data Scientist'),
 EmployeeModel(name='Grace Wilson', age=29, salary=38000, position='Junior AI Engineer'),
 EmployeeModel(name='Henry Moore', age=42, salary=49000, position='Principal Data Engineer'),
 EmployeeModel(name='Ivy Taylor', age=31, salary=35000, position='Data Analyst'),
 EmployeeModel(name='Jack Anderson', age=36, salary=46000, position='AI Solutions Architect')]

In [9]:
len(employees)

10

In [10]:
for employee in employees:
    print(f"{employee.name = } , {employee.salary = }")

employee.name = 'Alice Smith' , employee.salary = 45000
employee.name = 'Bob Johnson' , employee.salary = 48000
employee.name = 'Carol Williams' , employee.salary = 42000
employee.name = 'David Brown' , employee.salary = 49999
employee.name = 'Eve Davis' , employee.salary = 40000
employee.name = 'Frank Miller' , employee.salary = 47000
employee.name = 'Grace Wilson' , employee.salary = 38000
employee.name = 'Henry Moore' , employee.salary = 49000
employee.name = 'Ivy Taylor' , employee.salary = 35000
employee.name = 'Jack Anderson' , employee.salary = 46000


## CV or resuume model - a more complex and nested model

In [11]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int


class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    start_year: int
    end_year: int


class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]


result = await agent.run(
    "Create a swedish person applying for a data engineering position",
    output_type=CvModel,
)

resume = result.output
resume

CvModel(name='Erik Karlsson', age=32, experiences=[ExperienceModel(title='Senior Data Engineer', company='Data Solutions Inc.', description='Designed and implemented scalable data pipelines using Apache Spark and Kafka. Managed and optimized data warehouses (Snowflake). Developed ETL processes for various data sources.', start_year=2019, end_year=2023), ExperienceModel(title='Data Engineer', company='Tech Innovations', description='Built and maintained data infrastructure. Worked with relational and NoSQL databases. Assisted in migrating on-premise data systems to AWS.', start_year=2016, end_year=2019)], educations=[EducationModel(title='M.Sc. Computer Science', education_area='Data Engineering', school='KTH Royal Institute of Technology', description='Specialized in distributed systems and large-scale data processing. Thesis on real-time data streaming architectures.', start_year=2014, end_year=2016), EducationModel(title='B.Sc. Information Systems', education_area='Software Developme

In [12]:
resume.name, resume.age

('Erik Karlsson', 32)

In [13]:
resume.experiences[0].title

'Senior Data Engineer'

## Optional postprocessing -> load into duckdb and unnest

In [14]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="resume_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging",
)

info = pipeline.run(data=[resume.model_dump()], loader_file_format="jsonl", table_name="cv_entries")

print(info)

Pipeline resume_json_duckdb load step completed in 0.06 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/fkron/Documents/github/ai_engineering_fredrik_kron_de24/video_alongs/07_pydanticai_fundamentals/cv.duckdb location to store data
Load package 1764160224.145244 is LOADED and contains no failed jobs


In [15]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    educations = conn.sql("from staging.cv_entries__educations").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__educations,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__experiences,"[title, company, description, start_year, end_...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [16]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Erik Karlsson,32,1764160224.145244,JcJeBiNhklyUuA


In [17]:
educations

Unnamed: 0,title,education_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,M.Sc. Computer Science,Data Engineering,KTH Royal Institute of Technology,Specialized in distributed systems and large-s...,2014,2016,JcJeBiNhklyUuA,0,lL0b6n78BZYJkQ
1,B.Sc. Information Systems,Software Development,Uppsala University,Focused on software development principles and...,2011,2014,JcJeBiNhklyUuA,1,iTXdVi3o/CRDzw


In [18]:
experiences

Unnamed: 0,title,company,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Senior Data Engineer,Data Solutions Inc.,Designed and implemented scalable data pipelin...,2019,2023,JcJeBiNhklyUuA,0,ZK6hdrrQIlWkrg
1,Data Engineer,Tech Innovations,Built and maintained data infrastructure. Work...,2016,2019,JcJeBiNhklyUuA,1,ocDxsw4f3e25CA


In [19]:
duckdb.sql("""
    SELECT 
        cv.name, 
        cv.age, 
        ex.company,
        ex.description AS experience_description,
        ex.start_year AS experience_start_year,
        ex.end_year AS experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year AS education_start_year,
        e.end_year AS education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
    

""").df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,Erik Karlsson,32,Tech Innovations,Built and maintained data infrastructure. Work...,2016,2019,M.Sc. Computer Science,Data Engineering,KTH Royal Institute of Technology,2014,2016
1,Erik Karlsson,32,Tech Innovations,Built and maintained data infrastructure. Work...,2016,2019,B.Sc. Information Systems,Software Development,Uppsala University,2011,2014
2,Erik Karlsson,32,Data Solutions Inc.,Designed and implemented scalable data pipelin...,2019,2023,M.Sc. Computer Science,Data Engineering,KTH Royal Institute of Technology,2014,2016
3,Erik Karlsson,32,Data Solutions Inc.,Designed and implemented scalable data pipelin...,2019,2023,B.Sc. Information Systems,Software Development,Uppsala University,2011,2014
