In [None]:
##### Copyright 2026 Google LLC.
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.



# Gemini API: Structured Task Extraction with Pydantic

<a target="_blank" href="https://colab.research.google.com/github/google-gemini/cookbook/blob/main/examples/Structured_Data_Extraction_Pydantic.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=30/></a>

This notebook demonstrates how to use **Gemini 3 Flash** and **Pydantic** to transform unstructured, messy text into validated JSON objects. This is a core pattern for you to build AI agents that need to store data in databases or call external APIs.

In [None]:
# Install the latest 2026 Google GenAI SDK and Pydantic
%pip install -qU "google-genai>=1.0.0" pydantic

In [None]:
import os
from google import genai

# Setup API Key for both Local and Colab environments
try:
    from google.colab import userdata
    API_KEY = userdata.get("GOOGLE_API_KEY")
except ImportError:
    # Set this in your terminal: export GOOGLE_API_KEY="your-key"
    API_KEY = os.environ.get("GOOGLE_API_KEY")

client = genai.Client(api_key=API_KEY)

## Define the Schema
Using Pydantic, You define exactly what you want the AI to "think" about. You provide descriptions so the model understands the context of each field.

In [None]:
from pydantic import BaseModel, Field
from typing import List, Literal, Optional

class Task(BaseModel):
    title: str = Field(description="The name of the task")
    # Literal restricts the AI to ONLY these three words
    priority: Literal["High", "Medium", "Low"] = Field(description="The priority of the task")
    # Optional[str] explicitly tells the AI and the user that this can be empty
    due_date: Optional[str] = Field(description="The date/time mentioned or 'None' if not specified")

class TaskList(BaseModel):
    tasks: List[Task]


In [None]:
# @title Run Extraction

# Use @param to create an interactive text box in Colab
messy_input = "Fix the sink by Friday. Also, high priority: call the bank tomorrow." # @param {type:"string"}
MODEL_ID = "gemini-3-flash" # @param ["gemini-3-flash", "gemini-1.5-pro"] {"allow-input":true, "isTemplate": true}

# Call Gemini with the variable MODEL_ID 
response = client.models.generate_content(
    model=MODEL_ID,
    contents=f"Extract tasks: {messy_input}",
    config={
        'response_mime_type': 'application/json',
        'response_schema': TaskList,
    }
)

# Display result
for task in response.parsed.tasks:
    print(f"[{task.priority}] {task.title} - Due: {task.due_date}")