# Construct pydantic model from text input

In [1]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in Sweden, keep it short")
result

AgentRunResult(output='**Erik Svensson**, a System Administrator at a FinTech startup in Stockholm, known for his calm problem-solving and love of a good fika.')

In [3]:
print(result.output)

**Erik Svensson**, a System Administrator at a FinTech startup in Stockholm, known for his calm problem-solving and love of a good fika.


In [4]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30_000, lt=50_000)
    position: str

result = await agent.run(
    "Give me an IT employee working in sweden", output_type=EmployeeModel
)

result

AgentRunResult(output=EmployeeModel(name='Bjorn Borg', age=45, salary=45000, position='IT Consultant'))

In [5]:
employee = result.output
employee

EmployeeModel(name='Bjorn Borg', age=45, salary=45000, position='IT Consultant')

In [7]:
employee.name, employee.age, employee.position

('Bjorn Borg', 45, 'IT Consultant')

In [8]:
employee.model_dump()

{'name': 'Bjorn Borg', 'age': 45, 'salary': 45000, 'position': 'IT Consultant'}

In [11]:
print(employee.model_dump_json(indent=2))

{
  "name": "Bjorn Borg",
  "age": 45,
  "salary": 45000,
  "position": "IT Consultant"
}


### Simulate several employees or a list of employees

In [12]:
result = await agent.run(
    """Give me ten employees in AI and data engineering fields,
    roles can vary, salary must be between 30000 and 50000""",
    output_type=list[EmployeeModel],
)
employees = result.output
employees

[EmployeeModel(name='Alice', age=28, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob', age=32, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Carol', age=25, salary=38000, position='Machine Learning Engineer'),
 EmployeeModel(name='David', age=35, salary=42000, position='Data Scientist'),
 EmployeeModel(name='Eve', age=29, salary=39000, position='AI Research Scientist'),
 EmployeeModel(name='Frank', age=31, salary=47000, position='Big Data Engineer'),
 EmployeeModel(name='Grace', age=27, salary=35000, position='NLP Engineer'),
 EmployeeModel(name='Henry', age=33, salary=41000, position='AI Solutions Architect'),
 EmployeeModel(name='Ivy', age=26, salary=36000, position='Data Quality Engineer'),
 EmployeeModel(name='Jack', age=30, salary=49000, position='Computer Vision Engineer')]

In [13]:
len(employees)

10

In [14]:
for employee in employees:
    print(f"{employee.name = } , {employee.salary = }")

employee.name = 'Alice' , employee.salary = 45000
employee.name = 'Bob' , employee.salary = 48000
employee.name = 'Carol' , employee.salary = 38000
employee.name = 'David' , employee.salary = 42000
employee.name = 'Eve' , employee.salary = 39000
employee.name = 'Frank' , employee.salary = 47000
employee.name = 'Grace' , employee.salary = 35000
employee.name = 'Henry' , employee.salary = 41000
employee.name = 'Ivy' , employee.salary = 36000
employee.name = 'Jack' , employee.salary = 49000


## CV or resuume model - a more complex and nested model

In [16]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int


class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    start_year: int
    end_year: int


class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]


result = await agent.run(
    "Create a swedish person applying for a data engineering position",
    output_type=CvModel,
)

resume = result.output
resume

CvModel(name='Bjorn Borg', age=30, experiences=[ExperienceModel(title='Data Engineer', company='Spotify', description='Developed and maintained data pipelines', start_year=2018, end_year=2023)], educations=[EducationModel(title='MSc in Computer Science', education_area='Data Engineering', school='KTH Royal Institute of Technology', description='Focused on distributed systems and big data technologies', start_year=2016, end_year=2018)])

In [17]:
resume.name, resume.age

('Bjorn Borg', 30)

In [19]:
resume.experiences[0].title

'Data Engineer'

## Optional postprocessing -> load into duckdb and unnest

In [20]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="resume_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging",
)

info = pipeline.run(data=[resume.model_dump()], loader_file_format="jsonl", table_name="cv_entries")

print(info)

Pipeline resume_json_duckdb load step completed in 0.05 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/fkron/Documents/github/ai_engineering_fredrik_kron_de24/video_alongs/07_pydanticai_fundamentals/cv.duckdb location to store data
Load package 1764159148.419786 is LOADED and contains no failed jobs


In [23]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    educations = conn.sql("from staging.cv_entries__educations").df()
    experiences = conn.sql("from staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__educations,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__experiences,"[title, company, description, start_year, end_...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [24]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Bjorn Borg,30,1764159148.419786,oiY3HyZ0qoyA5A


In [25]:
educations

Unnamed: 0,title,education_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,MSc in Computer Science,Data Engineering,KTH Royal Institute of Technology,Focused on distributed systems and big data te...,2016,2018,oiY3HyZ0qoyA5A,0,WPna+OF55a13yQ


In [26]:
experiences

Unnamed: 0,title,company,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Data Engineer,Spotify,Developed and maintained data pipelines,2018,2023,oiY3HyZ0qoyA5A,0,3kDXEdlIM3iUNA


In [27]:
duckdb.sql("""
    SELECT 
        cv.name, 
        cv.age, 
        ex.company,
        ex.description AS experience_description,
        ex.start_year AS experience_start_year,
        ex.end_year AS experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year AS education_start_year,
        e.end_year AS education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
    

""").df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,Bjorn Borg,30,Spotify,Developed and maintained data pipelines,2018,2023,MSc in Computer Science,Data Engineering,KTH Royal Institute of Technology,2016,2018
