# Construct pydantic model from text input

In [None]:
from pydantic_ai import Agent

agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run(user_prompt="Give me an IT employee working in Sweden, keep it short.")

result

'**Astrid Eriksson** is a Software Developer in Stockholm, focusing on backend systems with Python and Golang for a growing fintech company. She values clean code and her regular fika breaks.'

In [4]:
print(result.output)

**Astrid Eriksson** is a Software Developer in Stockholm, focusing on backend systems with Python and Golang for a growing fintech company. She values clean code and her regular fika breaks.


In [6]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salaray: int = Field(gt=30000, lt= 50000)
    position: str

result = await agent.run(user_prompt="Give me an IT employee working in Sweden, kepp it short.", 
                         output_type=EmployeeModel
                         )
result

AgentRunResult(output=EmployeeModel(name='Johan Karlsson', age=35, salaray=45000, position='Software Engineer'))

In [8]:
employee = result.output
employee

EmployeeModel(name='Johan Karlsson', age=35, salaray=45000, position='Software Engineer')

In [11]:
employee.name, employee.age, employee.position

('Johan Karlsson', 35, 'Software Engineer')

In [None]:
#to get it as a dictionary
employee.model_dump()

{'name': 'Johan Karlsson', 'age': 35, 'salaray': 45000, 'position': 'Software Engineer'}


In [None]:
#to get it as a json
#employee.model_dump_json()
employee.model_dump_json(indent=2)

'{"name":"Johan Karlsson","age":35,"salaray":45000,"position":"Software Engineer"}'

### several employees or a list of employees

In [15]:
result = await agent.run("""
    Give me ten employees in AI and Data Engineering fields,
                         roles can vary, but salary must be between 30000 and 50000.
    """,
    output_type=list[EmployeeModel]
    )
employees = result.output
employees

[EmployeeModel(name='Alice', age=30, salaray=45000, position='AI Engineer'),
 EmployeeModel(name='Bob', age=35, salaray=48000, position='Data Engineer'),
 EmployeeModel(name='Charlie', age=28, salaray=40000, position='Machine Learning Engineer'),
 EmployeeModel(name='David', age=40, salaray=49000, position='Lead Data Scientist'),
 EmployeeModel(name='Eve', age=32, salaray=38000, position='AI Researcher'),
 EmployeeModel(name='Frank', age=38, salaray=42000, position='Data Analyst'),
 EmployeeModel(name='Grace', age=29, salaray=39000, position='Computer Vision Engineer'),
 EmployeeModel(name='Heidi', age=33, salaray=47000, position='NLP Engineer'),
 EmployeeModel(name='Ivan', age=31, salaray=41000, position='Big Data Engineer'),
 EmployeeModel(name='Judy', age=36, salaray=46000, position='AI Ethicist')]

In [16]:
len(employees)

10

In [21]:
for employee in employees:
    print(f"{employee.name = } and {employee.salaray = } ")

employee.name = 'Alice' and employee.salaray = 45000 
employee.name = 'Bob' and employee.salaray = 48000 
employee.name = 'Charlie' and employee.salaray = 40000 
employee.name = 'David' and employee.salaray = 49000 
employee.name = 'Eve' and employee.salaray = 38000 
employee.name = 'Frank' and employee.salaray = 42000 
employee.name = 'Grace' and employee.salaray = 39000 
employee.name = 'Heidi' and employee.salaray = 47000 
employee.name = 'Ivan' and employee.salaray = 41000 
employee.name = 'Judy' and employee.salaray = 46000 


In [None]:
## Resume model - more complex and nested model

In [23]:
class ExperienceModel(BaseModel):
    title : str
    company : str
    description: str
    start_year : str
    start_year : str

class EducationModel(BaseModel):
    title : str
    school : str
    education_area : str
    description: str
    start_year : str
    start_year : str

class CVModel(BaseModel):
    name: str
    age: int
    experience : list[ExperienceModel]
    education : list[EducationModel]

result = await agent.run(user_prompt= """
    Create a Swedish person applying for a data engineering position.
    """,
    output_type=CVModel
    )

resume = result.output
resume

CVModel(name='Erik Karlsson', age=32, experience=[ExperienceModel(title='Data Engineer', company='Spotify', description='Developed and maintained data pipelines for music recommendations.', start_year='2019'), ExperienceModel(title='Junior Data Engineer', company='Klarna', description='Assisted in building and optimizing data warehousing solutions.', start_year='2017')], education=[EducationModel(title='M.Sc. Computer Science', school='KTH Royal Institute of Technology', education_area='Data Engineering', description='Specialized in distributed systems and large-scale data processing.', start_year='2015'), EducationModel(title='B.Sc. Software Development', school='Uppsala University', education_area='Software Engineering', description='Focused on algorithms and data structures.', start_year='2012')])

In [25]:
resume.name, resume.age

('Erik Karlsson', 32)

In [29]:
resume.experience[0].title

'Data Engineer'

In [31]:
resume.model_dump().keys()

dict_keys(['name', 'age', 'experience', 'education'])

## optional post processing -> load into Duckdb and unnest it.

In [32]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="resume_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging"
) 

info = pipeline.run( data= [resume.model_dump()], loader_file_format="jsonl", table_name="cv_entries")

print(info)

Pipeline resume_json_duckdb load step completed in 0.08 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/john.sandsjo/Documents/github/data_platform_ai_course/video_lecture/07_pydantic_ai/cv.duckdb location to store data
Load package 1764163948.579485 is LOADED and contains no failed jobs


In [35]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc").df()
    cv_entries = conn.sql("from staging.cv_entries").df()
    educations = conn.sql("from staging.cv_entries__education").df()
    experiences = conn.sql("from staging.cv_entries__experience").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__education,"[title, school, education_area, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False
5,cv,staging,cv_entries__experience,"[title, company, description, start_year, _dlt...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, ...",False


In [36]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Erik Karlsson,32,1764163948.579485,Rn8lOSW+6Cr5kg


In [37]:
educations

Unnamed: 0,title,school,education_area,description,start_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,M.Sc. Computer Science,KTH Royal Institute of Technology,Data Engineering,Specialized in distributed systems and large-s...,2015,Rn8lOSW+6Cr5kg,0,rE7SkGIj87DBMA
1,B.Sc. Software Development,Uppsala University,Software Engineering,Focused on algorithms and data structures.,2012,Rn8lOSW+6Cr5kg,1,SYWq/7Wk9maJMQ


In [38]:
experiences

Unnamed: 0,title,company,description,start_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Data Engineer,Spotify,Developed and maintained data pipelines for mu...,2019,Rn8lOSW+6Cr5kg,0,uQRyHwVAA/K6Mw
1,Junior Data Engineer,Klarna,Assisted in building and optimizing data wareh...,2017,Rn8lOSW+6Cr5kg,1,lNNQ5jwPDBe7dw


In [40]:
duckdb.sql(
    """
    SELECT 
        cv.name, 
        cv.age,
        ex.company,
        e.title
    FROM cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id
"""
).df()

Unnamed: 0,name,age,company,title
0,Erik Karlsson,32,Klarna,M.Sc. Computer Science
1,Erik Karlsson,32,Klarna,B.Sc. Software Development
2,Erik Karlsson,32,Spotify,M.Sc. Computer Science
3,Erik Karlsson,32,Spotify,B.Sc. Software Development
