# Lecture -  Pydantic to structure output from LLMs

In [87]:
from dotenv import load_dotenv 
from google import genai
import os 

load_dotenv()

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

response = client.models.generate_content(
    model="gemini-2.5-flash", contents="Explain how AI works in a few words"
)
print(response.text)

AI learns patterns from data to make intelligent decisions or predictions.


In [88]:
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="""You are a helpful assistant. I need you to create a JSON object representing a library. 
    The library's name should be 'Coolu Libraru' and have the fields name and books that contains a list of book. 
    Each book should have a 'title', 'author', and 'year' field. Make sure the output is a single, valid JSON object. Give me 10 books. 
    Remove ```json and ``` """,
)

response.text

'{\n  "name": "Coolu Libraru",\n  "books": [\n    {\n      "title": "The Hitchhiker\'s Guide to the Galaxy",\n      "author": "Douglas Adams",\n      "year": 1979\n    },\n    {\n      "title": "Pride and Prejudice",\n      "author": "Jane Austen",\n      "year": 1813\n    },\n    {\n      "title": "1984",\n      "author": "George Orwell",\n      "year": 1949\n    },\n    {\n      "title": "To Kill a Mockingbird",\n      "author": "Harper Lee",\n      "year": 1960\n    },\n    {\n      "title": "The Great Gatsby",\n      "author": "F. Scott Fitzgerald",\n      "year": 1925\n    },\n    {\n      "title": "Moby Dick",\n      "author": "Herman Melville",\n      "year": 1851\n    },\n    {\n      "title": "War and Peace",\n      "author": "Leo Tolstoy",\n      "year": 1869\n    },\n    {\n      "title": "The Lord of the Rings",\n      "author": "J.R.R. Tolkien",\n      "year": 1954\n    },\n    {\n      "title": "Crime and Punishment",\n      "author": "Fyodor Dostoevsky",\n      "year": 1

In [89]:
print(response.text)

{
  "name": "Coolu Libraru",
  "books": [
    {
      "title": "The Hitchhiker's Guide to the Galaxy",
      "author": "Douglas Adams",
      "year": 1979
    },
    {
      "title": "Pride and Prejudice",
      "author": "Jane Austen",
      "year": 1813
    },
    {
      "title": "1984",
      "author": "George Orwell",
      "year": 1949
    },
    {
      "title": "To Kill a Mockingbird",
      "author": "Harper Lee",
      "year": 1960
    },
    {
      "title": "The Great Gatsby",
      "author": "F. Scott Fitzgerald",
      "year": 1925
    },
    {
      "title": "Moby Dick",
      "author": "Herman Melville",
      "year": 1851
    },
    {
      "title": "War and Peace",
      "author": "Leo Tolstoy",
      "year": 1869
    },
    {
      "title": "The Lord of the Rings",
      "author": "J.R.R. Tolkien",
      "year": 1954
    },
    {
      "title": "Crime and Punishment",
      "author": "Fyodor Dostoevsky",
      "year": 1866
    },
    {
      "title": "Don Quixote",
 

In [90]:
from pydantic import BaseModel, Field 
from typing import List
from datetime import datetime

class Book(BaseModel):
    title: str 
    author: str 
    year: int = Field(gt = 1000, lt = datetime.now().year)

class Library(BaseModel):
    name: str 
    books: List[Book]


library = Library.model_validate_json(response.text)
library

Library(name='Coolu Libraru', books=[Book(title="The Hitchhiker's Guide to the Galaxy", author='Douglas Adams', year=1979), Book(title='Pride and Prejudice', author='Jane Austen', year=1813), Book(title='1984', author='George Orwell', year=1949), Book(title='To Kill a Mockingbird', author='Harper Lee', year=1960), Book(title='The Great Gatsby', author='F. Scott Fitzgerald', year=1925), Book(title='Moby Dick', author='Herman Melville', year=1851), Book(title='War and Peace', author='Leo Tolstoy', year=1869), Book(title='The Lord of the Rings', author='J.R.R. Tolkien', year=1954), Book(title='Crime and Punishment', author='Fyodor Dostoevsky', year=1866), Book(title='Don Quixote', author='Miguel de Cervantes', year=1605)])

In [91]:
library.__dict__

{'name': 'Coolu Libraru',
 'books': [Book(title="The Hitchhiker's Guide to the Galaxy", author='Douglas Adams', year=1979),
  Book(title='Pride and Prejudice', author='Jane Austen', year=1813),
  Book(title='1984', author='George Orwell', year=1949),
  Book(title='To Kill a Mockingbird', author='Harper Lee', year=1960),
  Book(title='The Great Gatsby', author='F. Scott Fitzgerald', year=1925),
  Book(title='Moby Dick', author='Herman Melville', year=1851),
  Book(title='War and Peace', author='Leo Tolstoy', year=1869),
  Book(title='The Lord of the Rings', author='J.R.R. Tolkien', year=1954),
  Book(title='Crime and Punishment', author='Fyodor Dostoevsky', year=1866),
  Book(title='Don Quixote', author='Miguel de Cervantes', year=1605)]}

In [92]:
type(library)

__main__.Library

In [93]:
isinstance(library, BaseModel)

True

In [94]:
library.name

'Coolu Libraru'

In [95]:
library.books

[Book(title="The Hitchhiker's Guide to the Galaxy", author='Douglas Adams', year=1979),
 Book(title='Pride and Prejudice', author='Jane Austen', year=1813),
 Book(title='1984', author='George Orwell', year=1949),
 Book(title='To Kill a Mockingbird', author='Harper Lee', year=1960),
 Book(title='The Great Gatsby', author='F. Scott Fitzgerald', year=1925),
 Book(title='Moby Dick', author='Herman Melville', year=1851),
 Book(title='War and Peace', author='Leo Tolstoy', year=1869),
 Book(title='The Lord of the Rings', author='J.R.R. Tolkien', year=1954),
 Book(title='Crime and Punishment', author='Fyodor Dostoevsky', year=1866),
 Book(title='Don Quixote', author='Miguel de Cervantes', year=1605)]

extract titles into a list

In [96]:
titles = [book.title for book in library.books]
titles

["The Hitchhiker's Guide to the Galaxy",
 'Pride and Prejudice',
 '1984',
 'To Kill a Mockingbird',
 'The Great Gatsby',
 'Moby Dick',
 'War and Peace',
 'The Lord of the Rings',
 'Crime and Punishment',
 'Don Quixote']

extract titles with books after certain year

In [100]:
newer_books = [(book.title, book.year) for book in library.books if book.year > 1950]
newer_books

[("The Hitchhiker's Guide to the Galaxy", 1979),
 ('To Kill a Mockingbird', 1960),
 ('The Lord of the Rings', 1954)]

to get back json data use model_dump()

In [98]:
library.model_dump()

{'name': 'Coolu Libraru',
 'books': [{'title': "The Hitchhiker's Guide to the Galaxy",
   'author': 'Douglas Adams',
   'year': 1979},
  {'title': 'Pride and Prejudice', 'author': 'Jane Austen', 'year': 1813},
  {'title': '1984', 'author': 'George Orwell', 'year': 1949},
  {'title': 'To Kill a Mockingbird', 'author': 'Harper Lee', 'year': 1960},
  {'title': 'The Great Gatsby', 'author': 'F. Scott Fitzgerald', 'year': 1925},
  {'title': 'Moby Dick', 'author': 'Herman Melville', 'year': 1851},
  {'title': 'War and Peace', 'author': 'Leo Tolstoy', 'year': 1869},
  {'title': 'The Lord of the Rings', 'author': 'J.R.R. Tolkien', 'year': 1954},
  {'title': 'Crime and Punishment',
   'author': 'Fyodor Dostoevsky',
   'year': 1866},
  {'title': 'Don Quixote', 'author': 'Miguel de Cervantes', 'year': 1605}]}

output json file

In [None]:
with open("library.json", "w") as json_file:
    json_file.write(library.model_dump_json())

create pandas dataframe

In [99]:
import pandas as pd 

titles = [book.title for book in library.books]
years = [book.year for book in library.books]
authors = [book.author for book in library.books]

pd.DataFrame({"title": titles, "year": years, "author": authors})

Unnamed: 0,title,year,author
0,The Hitchhiker's Guide to the Galaxy,1979,Douglas Adams
1,Pride and Prejudice,1813,Jane Austen
2,1984,1949,George Orwell
3,To Kill a Mockingbird,1960,Harper Lee
4,The Great Gatsby,1925,F. Scott Fitzgerald
5,Moby Dick,1851,Herman Melville
6,War and Peace,1869,Leo Tolstoy
7,The Lord of the Rings,1954,J.R.R. Tolkien
8,Crime and Punishment,1866,Fyodor Dostoevsky
9,Don Quixote,1605,Miguel de Cervantes


<div style="background-color: #FFF; color: #212121; border-radius: 1px; width:22ch; box-shadow: rgba(0, 0, 0, 0.16) 0px 1px 4px; display: flex; justify-content: center; align-items: center;">
<div style="padding: 1.5em 0; width: 70%;">
    <h2 style="font-size: 1.2rem;">Kokchun Giang</h2>
    <a href="https://www.linkedin.com/in/kokchungiang/" target="_blank" style="display: flex; align-items: center; gap: .4em; color:#0A66C2;">
        <img src="https://content.linkedin.com/content/dam/me/business/en-us/amp/brand-site/v2/bg/LI-Bug.svg.original.svg" width="20"> 
        LinkedIn profile
    </a>
    <a href="https://github.com/kokchun/Portfolio-Kokchun-Giang" target="_blank" style="display: flex; align-items: center; gap: .4em; margin: 1em 0; color:#0A66C2;">
        <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" width="20"> 
        Github portfolio
    </a>
    <span>AIgineer AB</span>
<div>
</div>
