<a href="https://colab.research.google.com/github/ancestor9/2025_Fall_AI-Model-Operations-MLOps/blob/main/week07/Pydantic_Typing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **[Pydantic](https://docs.pydantic.dev/latest/)**

In [5]:
from pydantic import BaseModel

class Sale(BaseModel):
    product: str
    category: str
    quantity: int
    price: float

# 데이터 인스턴스 생성
data = {"product": "Laptop", "category": "Electronics", "quantity": 5, "price": 999.99}
sale = Sale(**data)

In [6]:
sale

Sale(product='Laptop', category='Electronics', quantity=5, price=999.99)

In [7]:
vars(sale)

{'product': 'Laptop',
 'category': 'Electronics',
 'quantity': 5,
 'price': 999.99}

In [8]:
type(sale)

In [9]:
# 데이터 검증 및 접근
print(sale.product)  # "Laptop"

Laptop


In [10]:
print(sale.dict())   # {"product": "Laptop", "category": "Electronics", "quantity": 5, "price": 999.99}

{'product': 'Laptop', 'category': 'Electronics', 'quantity': 5, 'price': 999.99}


/tmp/ipython-input-1298239440.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(sale.dict())   # {"product": "Laptop", "category": "Electronics", "quantity": 5, "price": 999.99}


In [11]:
# 유효성 검사 예시 (유효하지 않은 데이터 입력)
invalid_data = {"product": "Laptop", "category": "Electronics", "quantity": "Five", "price": 999.99}
try:
    sale = Sale(**invalid_data)  # 이 경우 자동으로 오류가 발생합니다 (quantity는 정수여야 합니다)
except Exception as e:
    print(e)

1 validation error for Sale
quantity
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='Five', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing


## **Class로 데이터 정의**

In [12]:
class Sale:
    def __init__(self, product, category, quantity, price):
        self.product = product
        self.category = category
        self.quantity = quantity
        self.price = price

# 데이터 인스턴스 생성
sale = Sale("Laptop", "Electronics", 5, 999.99)

# 데이터 접근
print(sale.product)  # "Laptop"

Laptop


In [13]:
sale

<__main__.Sale at 0x7f47a43ad070>

In [14]:

print(vars(sale))    # {'product': 'Laptop', 'category': 'Electronics', 'quantity': 5, 'price': 999.99}


{'product': 'Laptop', 'category': 'Electronics', 'quantity': 5, 'price': 999.99}


In [15]:

# 유효성 검사 직접 구현
invalid_data = Sale("Laptop", "Electronics", "Five", 999.99)  # 이 경우 오류가 발생하지 않습니다


### **2. Typing**
- Python의 typing 모듈은 타입 힌트를 제공하여 코드의 가독성과 안정성을 향상

In [16]:

from typing import List, Dict, Tuple

# 함수가 문자열의 리스트를 입력받고, 정수를 반환
def sum_lengths(strings: List[str]) -> int:
    return sum(len(s) for s in strings)

In [17]:
sum_lengths('ancestor9')

9

In [18]:
# 문자열과 정수의 튜플 리스트를 입력받아 딕셔너리를 반환
def create_dict(pairs: List[Tuple[str, int]]) -> Dict[str, int]:
    return {key: value for key, value in pairs}


In [19]:
create_dict([('ancestor', 23)])

{'ancestor': 23}

In [20]:
create_dict([('ancestor9', 23), ('ancestor8', 33)])

{'ancestor9': 23, 'ancestor8': 33}

In [21]:
from typing import Union

def process_number(value: Union[int, float]) -> float:
    return value * 2.5

In [22]:
process_number(10)

25.0

In [23]:
process_number(10.0)

25.0

### **3. 예제**

In [24]:
from typing import List, Optional
from pydantic import BaseModel

# Pydantic 모델 정의
class User(BaseModel):
    id: int
    name: str
    email: str

# 임시 데이터베이스로 사용할 리스트
users_db: List[User] = []

### create_user: 함수 이름으로, 사용자를 생성하는 역할
- user: 함수의 매개변수로, 호출될 때 전달되어야 하는 입력 데이터로, u**ser가 함수에 전달되는 사용자 정보 객체**
- User: user 매개변수가 기대하는 데이터 타입을 나타내며, Pydantic 모델 클래스로 사용자 정보를 정의하기 위해 id, name, email 속성을 가진 객체를 의미
- 타입 힌트의 의미
> - **타입 힌트 (user: User)**는 user 매개변수가 User 클래스의 인스턴스임을 명시
이를 통해 create_user 함
> - create_use라는 함수는 user라는 매개변수가 User 객체이고, 함수 호출 시 Pydantic은 이 매개변수로 전달되는 데이터를 검증
> - create_user를 호출할 때는 아래와 같이 User 객체를 만들어 전달

In [25]:
def create_user(user: User):
    # 이메일 중복 검사
    if any(existing_user.email == user.email for existing_user in users_db):
        return {"success": False, "message": "Email already registered"}
    users_db.append(user)
    return {"success": True, "user": user}

In [26]:
user_data= User(id = 100, name='ancestor9', email='zoro')
user_data

User(id=100, name='ancestor9', email='zoro')

In [27]:
create_user(user_data)

{'success': True, 'user': User(id=100, name='ancestor9', email='zoro')}

In [28]:
# prompt: faker로 100개 가상데이터(name과 email을 생성)를 User를 통해 만들어봐
! pip install faker --quiet

from faker import Faker

fake = Faker()

class User(BaseModel):
    name: str
    email: str

users: List[User] = []

for _ in range(100):
    users.append(User(name=fake.name(),
                      email=fake.email()))

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.8/2.0 MB[0m [31m23.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [32]:
import pandas as pd
pd.DataFrame(users)

Unnamed: 0,0,1
0,"(name, Emily Glass)","(email, gomezjustin@example.net)"
1,"(name, Kevin Wong)","(email, kwhite@example.org)"
2,"(name, Jason Parrish)","(email, scott69@example.net)"
3,"(name, Crystal Davis)","(email, stevenspencer@example.net)"
4,"(name, Jose Carlson)","(email, ubrown@example.net)"
...,...,...
95,"(name, Jason Martin)","(email, ygarcia@example.org)"
96,"(name, Jessica Garcia)","(email, rhogan@example.net)"
97,"(name, Lisa Stone)","(email, ronnie72@example.net)"
98,"(name, Julie Monroe)","(email, hayesmichael@example.org)"


In [33]:
[user.dict() for user in users][:10]

/tmp/ipython-input-2524632598.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  [user.dict() for user in users][:10]


[{'name': 'Emily Glass', 'email': 'gomezjustin@example.net'},
 {'name': 'Kevin Wong', 'email': 'kwhite@example.org'},
 {'name': 'Jason Parrish', 'email': 'scott69@example.net'},
 {'name': 'Crystal Davis', 'email': 'stevenspencer@example.net'},
 {'name': 'Jose Carlson', 'email': 'ubrown@example.net'},
 {'name': 'Ryan Smith', 'email': 'megan97@example.net'},
 {'name': 'Judy Robinson', 'email': 'rowetaylor@example.org'},
 {'name': 'Patrick Glass', 'email': 'margaretwright@example.com'},
 {'name': 'Laura Gonzales', 'email': 'brownjessica@example.org'},
 {'name': 'Sarah Preston', 'email': 'smithbenjamin@example.com'}]

In [34]:
pd.DataFrame([user.dict() for user in users])

/tmp/ipython-input-465260212.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  pd.DataFrame([user.dict() for user in users])


Unnamed: 0,name,email
0,Emily Glass,gomezjustin@example.net
1,Kevin Wong,kwhite@example.org
2,Jason Parrish,scott69@example.net
3,Crystal Davis,stevenspencer@example.net
4,Jose Carlson,ubrown@example.net
...,...,...
95,Jason Martin,ygarcia@example.org
96,Jessica Garcia,rhogan@example.net
97,Lisa Stone,ronnie72@example.net
98,Julie Monroe,hayesmichael@example.org
