# course link
https://www.youtube.com/watch?v=aMjGAh4cQTU&t=199s

In [1]:
import pydantic
print(pydantic.__version__)

import sys
print("pydantic object:", pydantic)
print("pydantic type:", type(pydantic))
print("pydantic file:", getattr(pydantic, "__file__", None))
print("sys.executable:", sys.executable)
# DON'T name the file after the package name!!


2.11.7
pydantic object: <module 'pydantic' from '/Users/lilun.zhang/VSCodeProjects/aie/.venv/lib/python3.11/site-packages/pydantic/__init__.py'>
pydantic type: <class 'module'>
pydantic file: /Users/lilun.zhang/VSCodeProjects/aie/.venv/lib/python3.11/site-packages/pydantic/__init__.py
sys.executable: /Users/lilun.zhang/VSCodeProjects/aie/.venv/bin/python


## validate and correct data types

In [1]:
from pydantic import BaseModel

# OOP
class User(BaseModel):
    name: str # type hints are required
    age: int
    is_active: bool
    
# create an instance
user1 = User(name="Bob", age='30', is_active= True)
print(user1)
print(user1.model_dump())   # we can also print it like this

name='Bob' age=30 is_active=True
{'name': 'Bob', 'age': 30, 'is_active': True}


In [None]:
# type validation
user2 = User(name='Bob', age="25", is_active=True)
print(user2)
print(type(user2.age))

however, if more complicated, pydantic would fail

In [2]:
# however, if it gets a bit complicated, it will fail
try:
    User(name="Charlie", age="twenty", is_active=False)
except ValueError as e:
    print(e)

1 validation error for User
age
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twenty', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing


## set default values

In [3]:
# we can also set default values
class Student(BaseModel):
    name: str
    age: int=18
    subject: str
    
# we don't need to pass in the age
# but if we did, it will be overwritten    
student1 = Student(name='Jeff', age=16, subject='Mathematics')
print(student1)

name='Jeff' age=16 subject='Mathematics'


# fields
Field makes us define more about the input param
- 1. contstraints
- 2. defaults/factories
- 3. decriptoin, title...

In [4]:
from pydantic import BaseModel, Field

class FieldUser(BaseModel):
    name: str = Field(description="The user's full name")   #very useful when building agents to set the metadata
    
field_user1 = FieldUser(name = "Vaibhav")
print(field_user1)

name='Vaibhav'


if we don't pass anything, if will use the default value for this param

In [5]:
class FieldUserDefault(BaseModel):
    name: str = Field(description="The user's full name", default = "John")
    
field_user2 = FieldUserDefault()   #did not pass anything this time!
print(field_user2)

name='John'


‚ö†Ô∏è however, pydantic does NOT validate the default value by default. For example:

In [None]:
class FlawUswer(BaseModel):
    age: int = Field(default="twelve")  #we set unpaired type here

user = FlawUswer()
print(user.age)
print(type(user.age))

twelve
<class 'str'>


## ValidationError

In [1]:
# so, we import another package: ValidationError
from pydantic import BaseModel, Field, ValidationError

class ProperUser(BaseModel):
    age: int = Field(default="twelve", validate_default=True)   # force to validate
    
try:
    user = ProperUser()
except ValidationError as e:
    print(e)

1 validation error for ProperUser
age
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twelve', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing


# Aliasing

An alias is a different name that our model can use to:
    1. Accept input (validation)
    2. Give output (serialization)

This is helpful when the external name ‚â† internal variable name.

In [None]:
class Package(BaseModel):
    weight: float = Field(alias="pkg_weight_kg")
    destination: str = Field(alias="pkg_dest")
    is_fragile: bool = Field(alias="pkg_is_fragile")

# order of data doesn't matterü§©, as long as they are named correctly!
data = {
    "pkg_weight_kg": 4.5,
    "pkg_is_fragile": True,
    "pkg_dest": "Singapore"
}

package = Package(**data)   #ü§©unpacking the dictionary - takes

print(package.weight)
# print(package.pkg_weight_kg)  # no such thing!

print(package.destination)
print(package.is_fragile)

# exporting with original alias names
print(package.model_dump(by_alias=True))
print(package.model_dump())

4.5
Singapore
True
{'pkg_weight_kg': 4.5, 'pkg_dest': 'Singapore', 'pkg_is_fragile': True}
{'weight': 4.5, 'destination': 'Singapore', 'is_fragile': True}


In [None]:
# Example I
class Student(BaseModel):
    # email is the internal variable
    email: str = Field(
        validation_alias = "student_email", #Accepts this as input
        serialization_alias = "studentEmail"    #Outputs this name ‚ö†Ô∏è
    )

# Incoming data
incoming_data = {
    "student_email": "hi@gmail.com"
}

student = Student(**incoming_data)
print(student.email)

print(student.model_dump())
print(student.model_dump(by_alias=True))

hi@gmail.com
{'email': 'hi@gmail.com'}
{'studentEmail': 'hi@gmail.com'}


In [6]:
# Example II
class Book(BaseModel):
    title: str = Field(
        validation_alias = "book_title",    #input will use this
        serialization_alias = "bookTitle"   #output will use this
    )   #no comma here ‚ö†Ô∏è
    author: str = Field(
        validation_alias = "author_name",   #input will use this
        serialization_alias = "authorName"  #output will use this
    )

backend_data = {
    "book_title": "Pydantic Guide",
    "author_name": "DataCamp"
}

book = Book(**backend_data)

print(book.title)
print(book.author)
print(book.model_dump())
print(book.model_dump(by_alias=True))

Pydantic Guide
DataCamp
{'title': 'Pydantic Guide', 'author': 'DataCamp'}
{'bookTitle': 'Pydantic Guide', 'authorName': 'DataCamp'}


In [None]:
# Numeric limits - part of Validation which we will be covering soon
class Product(BaseModel):
    name: str = Field(min_length=1, max_length=50)
    price: float = Field(gt=0)  #price should be greater than 0
    description: str | None = Field(default=None, max_length=300)

# Example usage
valid_product = Product(name="Laptop", price=999.99, description="Very cool laptop")
print(valid_product)

name='Laptop' price=999.99 description='Very cool laptop'


In [None]:
# Now lets try to create an invalid product
invalid_product = Product(name="", price=-10)   #This will trigger 2 validation errors: name and string don't meet requirements

ValidationError: 2 validation errors for Product
name
  String should have at least 1 character [type=string_too_short, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/string_too_short
price
  Input should be greater than 0 [type=greater_than, input_value=-10, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/greater_than

In [None]:
# Exercise

from pydantic import BaseModel, Field
class Book(BaseModel):
    title: str = Field(min_length=1, max_length=100)
    author: str 
    isbn: str = Field(default = None)
    price: int = Field(le = 1000, gt = 0)
    in_stock: bool = Field(default= True)
    
valid_book = Book(
    title="The Pragmatic Programmer",
    author="Andrew Hunt",
    price=29.99
)

# ü§©@model_validator
@model_validator is used when we want to:
1. Validate multiple fields together
2. To perform logic that involves the whole model
3. Run code before or after normal field validation

## validate after parsing (after the model built)

In [7]:
from pydantic import BaseModel, model_validator

class Event(BaseModel):
    name: str
    start_hour: int
    end_hour: int
   
    @model_validator(mode='after')
    def check_time(self):
        if self.end_hour <= self.start_hour:
            raise ValueError("end hour must be later than start hour")
        
        return self

event1 = Event(name="Hackathon", start_hour=10, end_hour=9) #‚ö†Ô∏èit can run smoothly BUT! it's logically not correct. how can end hour earlier than start hour? 

ValidationError: 1 validation error for Event
  Value error, end hour must be later than start hour [type=value_error, input_value={'name': 'Hackathon', 'st...our': 10, 'end_hour': 9}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error

## validate before parsing

In [None]:
""" 
    why @classmethod below
    This validator runs before an instance exists (because it‚Äôs building the instance).
    So Pydantic calls it on the class, not on an object.    
"""

class Delivery(BaseModel):
    pickup: int
    drop: int 
    
    @model_validator(mode='before')
    @classmethod

    def fix_input(cls, data):
        print("Before validator sees raw input:", data)
        # Let's swap them if they are reversed
        if int(data['drop']) < int(data['pickup']):
            data['pickup'], data['drop'] = data['drop'], data['pickup']
        return data
    
order1 = Delivery(pickup=15, drop=13)
print("After model validation:", order1.model_dump())

Before validator sees raw input: {'pickup': 15, 'drop': 13}
After model validation: {'pickup': 13, 'drop': 15}


1. mode='before'
- Use before to normalize raw inputs or fix types before parsing.
2. mode='after'
- Use after to validate relationships between already-parsed fields.

## 2 validations combined

In [11]:
# another mixed example
from pydantic import BaseModel, model_validator

class User(BaseModel):
    name: str
    age: int

    @model_validator(mode="before")
    def coerce_age(cls, values):
        # runs before parsing: values is raw input
        print("before mode")
        if "age" in values and values["age"] == "unknown":
            values["age"] = 0
        return values

    @model_validator(mode="after")
    def check_adult(self):
        # runs after parsing: self.age is int
        print("after mode")
        if self.age < 0:
            raise ValueError("age must be non-negative")
        return self

print(User(name="A", age="21"))         # age parsed to int
print(User(name="B", age="unknown"))    # age set to 0 in before validator


before mode
after mode
name='A' age=21
before mode
after mode
name='B' age=0


# @field_validator

In [None]:
from pydantic import BaseModel, field_validator

class Product(BaseModel):
    price: float
    
    @field_validator("price")
    def must_be_positive(value):
        if value <= 0:
            raise ValueError("Price must be greater than 0")
        return value

product1 = Product(price = 10)  #this can also be achieved using the 'Field'

# Built-in types

In [14]:
from pydantic import BaseModel, EmailStr, HttpUrl, PositiveInt 

class Contact(BaseModel):
    email: EmailStr #eg: If we remove the .com it will not work
    website: HttpUrl
    followers: PositiveInt # Must be > 0

good = Contract(
    email="sylvia@example",
    website="/example.com",
    followers = -10
)

print(good.model_dump())

ImportError: email-validator is not installed, run `pip install pydantic[email]`

# Nested models

In [None]:
# no additional imports needed!

class Address(BaseModel):
    street: str
    city: str
    postcode: str

class User(BaseModel):
    name: str 
    email: str 
    address: Address    #use another class

data = {
    "name": "Robert",
    "email": "robert@gmail.com",
    "address": {
        # below are from class Address
        "street": "123 UCL road",
        "city": "London",
        "postcode": "AB1 2CD"
    }
}

user = User(**data)
print(user.address.city)

London


In [None]:
# another example: create a list of lessons

from typing import List
from pydantic import BaseModel

class Lesson(BaseModel):
    title: str
    duration_minutes: int
    is_free: bool
    
class Tutorial(BaseModel):
    name: str
    instructor: str
    lessons: List[Lesson]
    
    
data = {
    "name": "Learn Pydantic",
    "instructor": "Vaibhav",
    "lessons": [
        {"title":"Basic models", "duration minutes": 10, "is_free":True},
        {"title":"Alias", "duration minutes": 20, "is_free":False},
        {"title":"Validation", "duration minutes": 30, "is_free":False}
    ]
}

pydantic_lesson = Tutorial(**data)
print(pydantic_lesson.model_dump())

# recursive models

In [3]:
from pydantic import BaseModel
from typing import List, Optional

class FamilyTree(BaseModel):
    name: str
    children: Optional[List["FamilyTree"]] = None #use string for forward reference
    

FamilyTree.model_rebuild()  #this tells pydantic to fully build the model after the whole class is defined

data = {
    "name": "root",
    "children": [
        {
            "name": "child_1",
            "children": [
                {"name": "grandchild_1"},
                {"name": "grandchild_2"}
            ]
        },
        {"name": "child_2",
         "children": [{"name": "grandchild_3"}]
         }
    ]
}

tree = FamilyTree(**data)
print(tree.model_dump())

{'name': 'root', 'children': [{'name': 'child_1', 'children': [{'name': 'grandchild_1', 'children': None}, {'name': 'grandchild_2', 'children': None}]}, {'name': 'child_2', 'children': [{'name': 'grandchild_3', 'children': None}]}]}


## ü§©let's visualize!

In [4]:
# let visualize the result above
def print_family_tree(node, indent=0):
    print(" " * indent + node.name)
    if node.children:
        for child in node.children:
            print_family_tree(child, indent + 2)
        
print_family_tree(tree)

root
  child_1
    grandchild_1
    grandchild_2
  child_2
    grandchild_3


# pydantic + fastapi
FastAPI uses BaseModel to:
- 1. Validate request bodies
- 2. Auto-generate OpenAPI schema
- 3. Produce API docs

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel

class Request(BaseModel):
    query: str
    top_k: int = 5

app = FastAPI()

@app.post("/search")
def search(req: Request):
    return req