In [None]:
!pip install pydantic[email]

Collecting email-validator>=2.0.0 (from pydantic[email])
  Downloading email_validator-2.3.0-py3-none-any.whl.metadata (26 kB)
Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->pydantic[email])
  Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)
Downloading email_validator-2.3.0-py3-none-any.whl (35 kB)
Downloading dnspython-2.8.0-py3-none-any.whl (331 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m331.1/331.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, email-validator
Successfully installed dnspython-2.8.0 email-validator-2.3.0


In [None]:
from pydantic import BaseModel, EmailStr, field_validator
from typing import Optional

class ContactInfo(BaseModel):
    name: str
    email: EmailStr
    phone: Optional[str] = None
    company: Optional[str] = None

    @field_validator('phone')
    @classmethod
    def validate_phone(cls, v):
        if v is None:
            return v
        cleaned = ''.join(filter(str.isdigit, v))
        if len(cleaned) < 10:
            raise ValueError('Phone number must have at least 10 digits')
        return v


In [None]:
import json

llm_response = '''
{
    "name": "Sarah Johnson",
    "email": "sarah.johnson@techcorp.com",
    "phone": "(555) 123-4567",
    "company": "TechCorp Industries"
}
'''

data = json.loads(llm_response)
contact = ContactInfo(**data)

print(contact.name)
print(contact.email)
print(contact.model_dump())


Sarah Johnson
sarah.johnson@techcorp.com
{'name': 'Sarah Johnson', 'email': 'sarah.johnson@techcorp.com', 'phone': '(555) 123-4567', 'company': 'TechCorp Industries'}


In [None]:
from pydantic import BaseModel, ValidationError
import json
import re

class ProductReview(BaseModel):
    product_name: str
    rating: int
    review_text: str
    would_recommend: bool

    @field_validator('rating')
    @classmethod
    def validate_rating(cls, v):
        if not 1 <= v <= 5:
            raise ValueError('Rating must be between 1 and 5')
        return v

def extract_json_from_llm_response(response: str) -> dict:
    """Extract JSON from LLM response that might contain extra text."""
    json_match = re.search(r'\{.*\}', response, re.DOTALL)
    if json_match:
        return json.loads(json_match.group())
    raise ValueError("No JSON found in response")

def parse_review(llm_output: str) -> ProductReview:
    """Safely parse and validate LLM output."""
    try:
        data = extract_json_from_llm_response(llm_output)
        review = ProductReview(**data)
        return review
    except json.JSONDecodeError as e:
        print(f"JSON parsing error: {e}")
        raise
    except ValidationError as e:
        print(f"Validation error: {e}")
        raise
    except Exception as e:
        print(f"Unexpected error: {e}")
        raise



In [None]:
messy_response = '''
Here's the review in JSON format:

{
    "product_name": "Wireless Headphones X100",
    "rating": 4,
    "review_text": "Great sound quality, comfortable for long use.",
    "would_recommend": true
}

Hope this helps!
'''

review = parse_review(messy_response)
print(f"Product: {review.product_name}")
print(f"Rating: {review.rating}/5")


Product: Wireless Headphones X100
Rating: 4/5


In [None]:
from pydantic import BaseModel, Field, field_validator
from typing import List

class Specification(BaseModel):
    key: str
    value: str

class Review(BaseModel):
    reviewer_name: str
    rating: int = Field(..., ge=1, le=5)
    comment: str
    verified_purchase: bool = False

class Product(BaseModel):
    id: str
    name: str
    price: float = Field(..., gt=0)
    category: str
    specifications: List[Specification]
    reviews: List[Review]
    average_rating: float = Field(..., ge=1, le=5)

    @field_validator('average_rating')
    @classmethod
    def check_average_matches_reviews(cls, v, info):
        reviews = info.data.get('reviews', [])
        if reviews:
            calculated_avg = sum(r.rating for r in reviews) / len(reviews)
            if abs(calculated_avg - v) > 0.1:
                raise ValueError(
                    f'Average rating {v} does not match calculated average {calculated_avg:.2f}'
                )
        return v


In [None]:
llm_response = {
    "id": "PROD-2024-001",
    "name": "Smart Coffee Maker",
    "price": 129.99,
    "category": "Kitchen Appliances",
    "specifications": [
        {"key": "Capacity", "value": "12 cups"},
        {"key": "Power", "value": "1000W"},
        {"key": "Color", "value": "Stainless Steel"}
    ],
    "reviews": [
        {
            "reviewer_name": "Alex M.",
            "rating": 5,
            "comment": "Makes excellent coffee every time!",
            "verified_purchase": True
        },
        {
            "reviewer_name": "Jordan P.",
            "rating": 4,
            "comment": "Good but a bit noisy",
            "verified_purchase": True
        }
    ],
    "average_rating": 4.5
}

product = Product(**llm_response)
print(f"{product.name}: ${product.price}")
print(f"Average Rating: {product.average_rating}")
print(f"Number of reviews: {len(product.reviews)}")


Smart Coffee Maker: $129.99
Average Rating: 4.5
Number of reviews: 2
