In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

### Use model in agent 

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash", api_key=GEMINI_API_KEY)

In [5]:
from langchain_core.tools import tool

@tool
def add_num(a: int, b:int):
    """add two number
        a: int 
        b: int
        return int
    """
    return a + b

In [6]:
from langgraph.prebuilt import create_react_agent

agent = create_react_agent(
    model=model,
    tools=[add_num]
)

In [8]:
agent.invoke({"messages":"add 200 and 300"})

{'messages': [HumanMessage(content='add 200 and 300', additional_kwargs={}, response_metadata={}, id='e8cee3ff-2b53-4135-b6ae-9def387f1102'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'add_num', 'arguments': '{"b": 300.0, "a": 200.0}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--8df055b6-1ac8-4447-a3ed-fbfa95bc6dee-0', tool_calls=[{'name': 'add_num', 'args': {'b': 300.0, 'a': 200.0}, 'id': '177f5df9-f670-4cd1-bd62-40f32623e6fc', 'type': 'tool_call'}], usage_metadata={'input_tokens': 72, 'output_tokens': 106, 'total_tokens': 178, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 82}}),
  ToolMessage(content='500', name='add_num', id='53c19da5-a3d8-4ca0-9a04-22bac297a91c', tool_call_id='177f5df9-f670-4cd1-bd62-40f32623e6fc'),
  AIMessage(content='The sum of 200 and 300 is 500.', additional_kwargs={}, res

### Dynamic model selection

In [1]:
# ollama model
from langchain_ollama import ChatOllama

# disable streaming
ollama_model = ChatOllama(model="gemma3n", disable_streaming=True)

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

gemini_model = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash", api_key=GEMINI_API_KEY)

In [90]:
from langchain_core.tools import tool

@tool
def weather(city: str):
    """
        give weather of perticular city
        city: name of city
        return :str - weather of city
    """
    return f"{city} weather is cool"

In [119]:
from langgraph.prebuilt.chat_agent_executor import AgentState
from langchain_core.runnables.config import RunnableConfig
from langchain_core.runnables import Runnable
from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import BaseMessage
from langchain_core.language_models import BaseLanguageModel


def select_model(state: AgentState, config: RunnableConfig) -> BaseLanguageModel:
    if config["metadata"]["provider"] == "ollama":
        model = ollama_model
    elif config["metadata"]["provider"] == "gemini":
        model = gemini_model
    else:
        raise ValueError(f"Unsupported provider: {config["metadata"]["provider"]}")

    return model.bind_tools([weather]) 

In [120]:
from langgraph.prebuilt import create_react_agent

agent = create_react_agent(
    select_model,
    tools=[]
)

In [121]:
agent.invoke({"messages":"what is weather of rajula"}, config={"configurable":{"provider":"gemini"}})

{'messages': [HumanMessage(content='what is weather of rajula', additional_kwargs={}, response_metadata={}, id='af132fb7-4c39-4cc6-b1cf-c4e8b006416e'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'weather', 'arguments': '{"city": "rajula"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--ab47a601-721d-4497-bef5-e8053b2d29c3-0', tool_calls=[{'name': 'weather', 'args': {'city': 'rajula'}, 'id': '4d6b872f-0271-4335-a18e-f0f05f0bbb64', 'type': 'tool_call'}], usage_metadata={'input_tokens': 59, 'output_tokens': 74, 'total_tokens': 133, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 60}})]}

### Advanced Model Config

In [None]:
# add fallback model

model = (
    ChatGoogleGenerativeAI(model="models/gemini-2.5-flash", api_key=GEMINI_API_KEY)
    .with_fallbacks([
        ollama_model
    ])
)

In [6]:
model.invoke("hello")

AIMessage(content='Hello! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--a287c7ce-cc40-46eb-928e-6e8036eb6a5c-0', usage_metadata={'input_tokens': 2, 'output_tokens': 191, 'total_tokens': 193, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 182}})

### use build-in rate limiter

In [None]:
from langchain_core.rate_limiters import InMemoryRateLimiter

rate_limiter = InMemoryRateLimiter(
    requests_per_second=1,
    check_every_n_seconds=1,
    max_bucket_size=2
)

model = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash", api_key=GEMINI_API_KEY, rate_limiter=rate_limiter)


In [9]:
model.invoke("hello")

AIMessage(content='Hello there! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--fe062a28-aa2f-43e5-970b-8862ca6eaa4a-0', usage_metadata={'input_tokens': 2, 'output_tokens': 41, 'total_tokens': 43, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 31}})