In [None]:
!pip install -q \
fastapi uvicorn pyngrok nest-asyncio geopy \
langchain-core langchain-community langchain-text-splitters langchain-huggingface\
langgraph \
transformers \
accelerate \
sentence-transformers \
faiss-cpu \
pydantic \
--upgrade
print('Pip installation completed !')

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.6/90.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.7/110.7 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.3/157.3 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m463.6/463.6 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from google.colab import drive
import json

# 1️⃣ Mount Google Drive
drive.mount('/content/drive')

# 2️⃣ Path to config JSON
CONFIG_FILE_PATH = "/content/drive/MyDrive/Agentic AI/config.json"

# 3️⃣ Load NGROK_AUTH_TOKEN from JSON
try:
    with open(CONFIG_FILE_PATH, "r") as f:
        config = json.load(f)
    NGROK_AUTH_TOKEN = config.get("NGROK_AUTH_TOKEN")
    if not NGROK_AUTH_TOKEN:
        raise ValueError("NGROK_AUTH_TOKEN missing in JSON")
except Exception as e:
    raise RuntimeError(f"Failed to load NGROK_AUTH_TOKEN: {e}")


# Cell 2: Ngrok V3 Update and Authtoken Configuration (Crucial Fixes)

# 1. Update ngrok binary to V3 (Fixes ERR_NGROK_121)
!sudo rm -f /usr/local/bin/ngrok
!wget -q -c -nc https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip
!unzip -qq -n ngrok-v3-stable-linux-amd64.zip
!mv ngrok /usr/local/bin/ngrok

# 2. Cleanup conflicting files (Fixes configuration conflicts)
!sudo rm -f /root/.ngrok2/ngrok.yml

# 3. Force your authtoken to be saved (Fixes ERR_NGROK_4018)
# Use the correct V3 command: 'ngrok authtoken <TOKEN>'
!/usr/local/bin/ngrok authtoken {NGROK_AUTH_TOKEN}

print("Ngrok V3 configuration complete and authtoken saved.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Ngrok V3 configuration complete and authtoken saved.


In [None]:
# ====== Libraries ======
import sys
import random
import threading
import nest_asyncio
import time
import requests

from google.colab import drive

from typing import TypedDict, Dict, Any
# Dict is key and value scores: Dict[str, int] = {"math": 90, "science": 85}
# TypedDict --> Used when you want to define the structure of a dictionary
from pydantic import BaseModel
"""
from typing import TypedDict
class Person(TypedDict):
    name: str
    age: int
p1: Person = {"name": "Alice", "age": 25}   # OK
p2: Person = {"name": "Bob"}                # Missing 'age'

Any --> Means anything goes — use when type is unknown or can vary.
Example:
from typing import Any, Dict

data: Dict[str, Any] = {
    "name": "Charlie",
    "age": 30,
    "hobbies": ["reading", "swimming"]
}
Here, values can be of any type — string, int, list, etc.
-------------------------------------------------------------
What is BaseModel?
BaseModel is a class from the pydantic library.
It lets you define data models with type validation, conversion, and error checking — all automatically.

BaseModel is a smart data class that:
checks data types,
converts types if needed,
and raises clear errors when data is invalid.

from pydantic import BaseModel
class User(BaseModel):
    name: str
    age: int

# Example data
user1 = User(name="Alice", age=25)       # Works
user2 = User(name="Bob", age="25")       # Works — auto-converted to int
user3 = User(name="Eve")                 # Raises error (missing 'age')

print(user2.age)  # 25 (converted to int)
------------------------------------------------------
BaseModel is more powerful than TypedDict, Dict, and Any.
But let’s see why you might not always use it, even though it’s great.

Although BaseModel is powerful, there are trade-offs:

Performance
BaseModel runs validation logic every time an object is created.
For large data or loops with millions of objects → it’s slower.
TypedDict or plain Dict are much faster because they do nothing at runtime.

costs CPU & memory because validation runs every time you create an object.
"""

from geopy.geocoders import Nominatim

from fastapi import FastAPI
import uvicorn
from pyngrok import ngrok

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline

from langgraph.graph import StateGraph, END

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

In [None]:
"""
API stands for Application Programming Interface.
It’s a way for two programs to talk to each other — like a messenger between systems.

Think of a restaurant:
You (the user) → ask for food (a request)
Waiter (the API) → takes your request to the kitchen
Kitchen (the backend system) → makes the food (processes data)
Waiter brings it back to you (response)

FastAPI is a Python framework for building APIs
    — specifically web APIs that send and receive data over the internet.
It’s called FastAPI because it’s both:
Fast to run (very high performance)
Fast to build (automatic validation, docs, and type support)

FastAPI is built on top of Starlette (for web) and Pydantic (for data validation).
Purpose of FastAPI
To make it fast and easy to build APIs that:
  handle HTTP requests (like GET, POST, PUT, DELETE),
  automatically validate input data,
  return responses in JSON,
  generate automatic API docs (Swagger UI),
  run super fast ~33,000 Requests/sec (approx)
      -- (close to Node.js ~35,000 Requests/sec (approx) and Go ~40,000 Requests/sec (approx)).
      -- Flask 3,000 – 4,000 requests/sec (approx.)
Tool	What it does	Purpose
FastAPI	A Python web framework	Builds and runs your API (logic, routes, validation, etc.)
Swagger UI (built into FastAPI)
  --> A web interface	Automatically shows your API documentation and lets you test endpoints
ngrok	A tunneling tool	Exposes your local server (like FastAPI running on localhost) to the public internet

Under the hood, FastAPI uses Uvicorn (an ASGI server) to actually serve those endpoints over the network.
 --> ASGI (Asynchronous Server Gateway Interface) is a specification that defines how Python web frameworks (like FastAPI) talk to web servers (like Uvicorn or Hypercorn).
app = FastAPI(title="Banking RAG Agent")
uvicorn.run(app, host="0.0.0.0", port=API_PORT) --> line starts the web server,
- is a local server using my port, and ngrok makes it public
Listens for HTTP requests (e.g., /query_index, /get_branch_info)
Routes them to your Python functions (@app.post, @app.get)
Returns JSON responses
In short: Uvicorn = actual HTTP server, FastAPI = framework to define routes and handle logic
uvicorn runs your local FastAPI server on 0.0.0.0:{API_PORT}.
ngrok.connect(port) exposes that to the public internet via a temporary HTTPS URL.
That’s why you can test it remotely — ngrok tunnels your local port.
So yes:
Your local FastAPI + ngrok = publicly exposed agent endpoints.

ngrok URL is just the public entry point.
The actual work—running the Uvicorn server, accessing FAISS, and
using the LLM—is happening entirely locally on your computer.
The tunnel is just the invisible, secure connection facilitating the communication.
"""

In [None]:
"""
Every time a model generates the next word, it does NOT think in sentences.
It predicts a list of possible next tokens with probabilities.

Example:
Model wants to pick next word after: “The cat sat on the”

It produces something like:

Token	Probability
“mat”	0.60
“floor”	0.20
“chair”	0.05
“bed”	0.03


do_sample = False (NO RANDOMNESS)
Model always picks the top token (highest probability):
→ Always picks “mat”

do_sample = True (RANDOMNESS ALLOWED)
Now the model samples (randomly chooses) from the probability distribution.
Meaning:
It might pick “mat” (60% chance)
Or “floor” (20% chance)
Or “chair” (5% chance)
------------------------------------------------------------
Temperature ONLY works when do_sample=True.
Low temperature (0.1, 0.3)
→ Slight randomness
→ Mostly deterministic
→ More factual, stable answers

High temperature (0.8, 1.2)
→ More randomness
→ More creative, varied answers


THE BEST ANALOGY
do_sample = the ON/OFF switch
"Should I allow randomness at all?"

temperature = the intensity knob
"If randomness is ON, how strong should it be?"
"""

In [None]:
# -----------------------
# CONFIG
# -----------------------
NGROK_AUTH_TOKEN = "" # <- Enter NGROK TOKEN
HF_TOKEN = "" # <- Enter HUGGING FACE TOKEN
API_PORT = random.randint(2000,9000)
print(f"**INTERNAL FASTAPI PORT:** {API_PORT}")
MODEL_NAME = 'google/flan-t5-large'
EMBEDDING_MODEL = 'all-MiniLM-L6-v2'

try:
  drive.mount('/content/drive')
  print('\nWaiting for file access....')
except Exception as e:
  print(f"Drive not mounted... {e}")

time.sleep(5)
KNOWLEDGE_FILE_PATH = "/content/drive/MyDrive/Agentic AI/knowledge_base.txt"
CONFIG_FILE_PATH = "/content/drive/MyDrive/Agentic AI/config.json"

# -----------------------
# LOAD TOKENS FROM JSON
# -----------------------
try:
    with open(CONFIG_FILE_PATH, "r") as f:
        config = json.load(f)
    NGROK_AUTH_TOKEN = config.get("NGROK_AUTH_TOKEN")
    HF_TOKEN = config.get("HF_TOKEN")
    print("Config loaded successfully!")
except FileNotFoundError:
    print(f"Config file not found at {CONFIG_FILE_PATH}")
    NGROK_AUTH_TOKEN = None
    HF_TOKEN = None
except json.JSONDecodeError:
    print(f"Error decoding JSON in {CONFIG_FILE_PATH}")
    NGROK_AUTH_TOKEN = None
    HF_TOKEN = None

# --- NEW FALLBACK KNOWLEDGE DEFINITION ---
FALLBACK_KNOWLEDGE_TEXT = """
URA Bank Loan Product Information

### Core Consumer Loans
* **Unsecured Personal Loan:**
    * Fixed Interest Rate: 6.5%
    * Maximum Tenure: 5 years
    * Maximum Loan Amount: $35,000
    * Prepayment Penalty: 1% of the remaining principal balance after the first year.

* **Home Purchase Loan:**
    * Fixed Interest Rate: 4.8% (for the first 3 years, then variable)
    * Maximum Tenure: 30 years
    * Maximum Loan Amount: $800,000
    * Prepayment Penalty: None.

### Operating Hours and Accounts
* **Main Bank Operating Hours:** Monday to Friday, **9:00 AM to 4:30 PM**. Saturday, **9:00 AM to 12:00 PM**.
* **IRA Accounts:** Individual Retirement Accounts (IRA) are only available to customers with a minimum balance of **$1,500** in a URA Bank checking or savings account.

### Specialized Secured Loans
* **Car Loan (New Vehicles Only):**
    * Maximum Amount: $50,000
    * Interest Rate: 5.5% fixed.
    * Prepayment Penalty: None.
"""
# --- END OF FALLBACK KNOWLEDGE DEFINITION ---

nest_asyncio.apply()
# allows you to run nested asynchronous (start a task and move on without waiting for it to finish.) code (like FastAPI or LangGraph)

# -----------------------
# LOAD LLM
# -----------------------
print(f"\nLoading augmentation model: {MODEL_NAME}")
# accelerate is a library from Hugging Face that manages device placement (GPU or CPU) for models.
try:
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, token=HF_TOKEN)
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)

  # Pipeline can load automatically, but explicitly loading is for more control.
  pipe = pipeline(
      task='text2text-generation',
      model = model,
      tokenizer = tokenizer,
      max_new_tokens = 512,  # Maximum number of tokens the model will generate in the output
      do_sample = True, # Whether the model should pick words randomly, True = adds variety to output, False = deterministic output.
      temperature = 0.3, # works only when do_sample is True, Controls the randomness when sampling (lower = more deterministic, higher = more random)
      repetition_penalty = 1.5, # Penalizes repeating the same tokens to reduce repetition in the generated text
      model_kwargs = {'dtype':model.dtype,'eos_token_id':tokenizer.eos_token_id}
  ) # Additional model-specific arguments:
    # 'dtype': defines the data type of the model (e.g., float32)
    # 'eos_token_id': tells the model when to stop generating text

  # Take this HuggingFace pipeline and convert it into a LangChain-compatible LLM.
  hf_llm = HuggingFacePipeline(
      pipeline = pipe,
      model_kwargs = {'stop':['###']}, #Stop generating text when you see the string ###.
  )
  print("LLM pipeline is ready.")
except Exception as e:
  print(f"Error loading model {MODEL_NAME}.... {e}")
  raise e

# -----------------------
# INGEST + VECTOR STORE
# -----------------------
def read_knowledge_text(path: str):
  try:
    with open(path, 'r', encoding='utf-8') as f:
      return f.read()
  except FileNotFoundError:
    print(f"Knowledge file not found at path: {path}. Using FallBack content")
    return FALLBACK_KNOWLEDGE_TEXT

# INDEXING (one time at startup in real world) , pre- RAG stage, but we have kept this for e2e understand
def build_vector_store(text: str):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 380, chunk_overlap = 50)
  documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(text)]
  embed_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
  vector_store = FAISS.from_documents(documents, embed_model)
  """
  Each document is turned into an embedding, FAISS stores them in a fast vector index
  This allows similarity search, This is your vector database.
  """
  print(f"\n  --> Documents Indexed: {len(documents)}")
  return vector_store

knowledge_text = read_knowledge_text(KNOWLEDGE_FILE_PATH)
VECTOR_STORE = build_vector_store(knowledge_text)
print("\nVector Store is ready.")

retriever = VECTOR_STORE.as_retriever(
    search_type='mmr',
    search_kwargs={'k': 2, 'fetch_k':8})

# -----------------------
# RETRIEVER
# -----------------------
"""
VECTOR_STORE.as_retriever:
- We are creating a search object (retriever) on top of the vector store (FAISS vector database).
MMR Retriever:
The retriever is configured to use the MMR algorithm — Maximum Marginal Relevance.
MMR looks at two things at the same time:
Relevance → how closely a document matches the query
Diversity → making sure the selected documents are not identical to each other

How the process works:
First, fetch_k = 8 →
FAISS performs similarity search (cosine distance/EUCLIDEAN) and retrieves the top 8 similar documents.
Out of these 8, MMR selects the first document purely by highest relevance.

Then MMR calculates marginal relevance for the remaining 7 documents →
This step ensures the next selected document is not only relevant but also different from the first one.
Finally, it chooses k = 2 →
So you get 2 documents that are BOTH relevant and diverse, not duplicates of each other.
"""

# -----------------------
# AUGMENT WITH LLM (Prompt)
# -----------------------

def augment_with_llm(context: str, query: str):
  prompt = f"""
You are a professional URA Bank assistant. Answer the Question **STRICTLY AND ONLY** using the Context below.
Your final answer must be **complete**, address **ALL parts** of the user's question, and use a structured format (headings or bullet points) for multi-part questions.

**INSTRUCTIONS:**
1. Use the Context to answer the Question fully.
2. Highlight key data points with **bold**.
3. Structure the output clearly using **bold headings** for each topic requested in the query. For comparisons, **ENSURE BOTH ITEMS ARE PRESENT AND SEPARATELY DESCRIBED**.
4. **DO NOT MENTION OR INCLUDE ANY TOPIC OR FACT NOT EXPLICITLY REQUESTED IN THE QUESTION.** For example, if the question asks about 'IRA Accounts,' do not mention 'Service Fees,' even if they appear in the same context block.
5. If not available, say: "I apologize, but the specific information you requested is not available in my knowledge base. Please contact a URA Bank specialist for further assistance."
6. Do not include any information that is not directly requested in the Question.
7. End with '###'.

>>> CONTEXT START <<<
{context}
>>> CONTEXT END <<<

Question: {query}
--------
Answer:
"""

  try:
    result = hf_llm.invoke(prompt).strip()
  except Exception as e:
    result = f"LLM Generation Failed: {e}."
  if result.endswith('###'):
    result = result[:-3].strip()
  return result

# -----------------------
# FASTAPI APP
# -----------------------
app = FastAPI(title="Bank RAG AI Agent") #Initializes the web application.

"""
“This is my root endpoint — the very first API route.
When someone visits the API at /, this function runs and returns a small JSON status message.
@app.get("/") → means this is a GET request at the root path.
read_root() → the function FastAPI calls.
"""

@app.get("/")
def read_root():
  return {
      "status": "URA Bank AI Agent",
      "title": app.title,
      "description": app.description,
      "version": app.version,
      "api_docs": "Access Swagger UI at docs for full endpoint details."
  }

class QueryRequest(BaseModel):
  query: str

"""
# @app.post("/query_index", ...)	Defines an API endpoint that receives a
user query (request: QueryRequest) and executes the RAG process.
# end point -->> is essentially the address where a service or resource can be found.
In our example, /query_index is the path on your server.
# async allows your server to handle many users (queries) at the exact same time without
one slow query blocking everyone else. It keeps the whole application responsive and fast.

request is the object (or instance of the class) that holds the data.
QueryRequest is the class that defines what the object should look like.
query is the attribute (the specific piece of data) defined within that blueprint.

response_model=Dict[str, Any]
dictionary (Dict) where the keys are strings (str) and the values can be anything (Any)."

The @app.post decorator (which is provided by a framework like FastAPI) is a function that has
been specifically programmed to look for a certain set of keywords.
"/query_index" is the first (positional) argument: the path.
response_model= is a predefined keyword that the framework recognizes.
response_model acts like a configuration setting key for the FastAPI framework.
"""

@app.post("/query_index", response_model=Dict[str,Any])
async def query_index(request: QueryRequest): #request object of class QueryRequest
  query = request.query # query is attribute of this class
  try:
    print("First attempt to get relevent information from Vector Store")
    docs = retriever.invoke(query)
  except Exception as e:
    print(f"Second attempt to get relevent information from Vector Store, due to Exception {e}")
    docs = retriever.get_relevenat_documents(query)
  context = "\n----\n".join([d.page_content for d in docs])
  """
  # Vector store contains two things: the original Document objects (with page_content) and the embedding model.
  # The retriever is a wrapper on the vector store: it embeds the query using the same embedding model,
  # searches FAISS using the configured search type (MMR), and returns the top k=2 Document objects.
  # Each Document contains a text chunk (page_content).
  # We extract d.page_content to get just the text for the LLM.
  # Joining them with '\n----\n' forms a single context string for the LLM input.
  """
  print(f"Debug: Retrieved Context for {query[:20]}....': {context[:100]}....")
  final_answer = augment_with_llm(context, query)
  return {"query":query, "index_response":final_answer, 'llm_model': MODEL_NAME}


@app.get('/get_branch_info')
def get_branch_info(zip_code: str = '500034'):
  geolocator = Nominatim(user_agent="ura_bank_locator") # Nominatim (powered by OpenStreetMap data)
  #user agent -->standard and mandatory param, It tells the service who is using their data.
  search_query = f"HDFC Bank {zip_code}" #trying to find a relate branch address.
  try:
    location = geolocator.geocode(search_query)
    if location:
      address = f"URA Bank Branch (Nearest  to {zip_code}): {location.address}"
    else:
      address = address = f"URA Bank Branch not found for zip code {zip_code}."
  except Exception as e:
    address = f"Geocoding failed: {e}"
  return {"branch_address": address}

# -----------------------
# LANGGRAPH AGENT
# -----------------------
class AgentState(TypedDict):
  query: str
  rag_result: str

# it calls the FastAPI /query_index endpoint (the RAG engine) and gets the answer.
"""
tool_node is a function that acts as a LangGraph node.
Its job: take a query from the agent state, send it to the FastAPI RAG endpoint (/query_index),
and get the response.

requests.post acts as an HTTP client. Sends a POST request to /query_index.
"""

def tool_node(state: AgentState): # state - object of class AgentState
  url = f"http://127.0.0.1:{API_PORT}/query_index"
  print(f"\nLangGraph: Calling Rag tool for query: {state['query'][:50]}....") #query is the attribute
  response = requests.post(url, json={'query':state['query']},timeout=300)
  #The requests library is being used to act as an HTTP Client to talk to your running FastAPI server's endpoint
  if response.status_code == 200:
    result = response.json().get('index_response','Error in RAG response')
  else:
    result = f"RAG tool failed with status {response.status_code}"
  return AgentState(query=state['query'], rag_result=result)

def synthesis_node(state: AgentState):
  return AgentState(
      query = state['query'],
      rag_result =(
      f"**FINAL REPORT from LangGraph**\n"
      f"**Query:** {state['query']}\n"
      f"**Answer (via {MODEL_NAME} Augmentation):** {state['rag_result']}"
      )
  )

# -----------------------
# SERVER + NGROK
# -----------------------
def run_server():
  uvicorn.run(app, host="127.0.0.1", port=API_PORT, log_level='error')

def start_ngrok(port=API_PORT):
  try:
    if NGROK_AUTH_TOKEN:
      ngrok.set_auth_token(NGROK_AUTH_TOKEN)
      tunnel = ngrok.connect(port)
      url = tunnel.public_url
      print("\n"+ "=="*80)
      print(f'FastAPI is live on PUBLIC ULR: {url}')
      print(f'Swagger UI                   : {url}/docs')
      print("\n"+ "=="*80)
      return tunnel
      """
      Swagger UI is the graphical interface that FastAPI uses to display its API documentation.
      All Available Endpoints: (e.g., /query_index and /get_branch_info).
      """
  except Exception as e:
    print(f'NGROK set up failed. {e}')
  return None

if __name__ == '__main__':
  server_thread = threading.Thread(target=run_server, daemon=True)
  # daemon=True
  # If the main program finishes, all running daemon threads are immediately and abruptly terminated.
  server_thread.start()
  print(f'\nWaiting 15s for uvicorn server to stabilize....')
  time.sleep(15)
  tunnel_object = start_ngrok(API_PORT)

  graph_builder = StateGraph(AgentState)
  graph_builder.add_node('rag_tool',tool_node) #Adds the first node (RAG tool).
  graph_builder.add_node('final_summary', synthesis_node) #Adds the second node (LLM summary)
  graph_builder.set_entry_point('rag_tool') #Tells the graph where to begin
  graph_builder.add_edge('rag_tool', 'final_summary') #Defines the path from RAG to summary
  graph_builder.add_edge('final_summary',END) #Defines the stopping point.
  graph = graph_builder.compile() # Finalizes and optimizes the graph.

  test_queries = [
        "What is the fixed interest rate and the maximum tenure for the Unsecured Personal Loan compared to the Home Purchase Loan?",
        "What are the main operating hours for the bank and the requirement for IRA accounts?",
        "What is the maximum loan amount and interest rate for a Car Loan, and is there a prepayment penalty?",
    ]

  for q in test_queries:
    print(f"\n-- Query: {q[:50]}")
    final_state = graph.invoke({"query":q},config={'recursion_limit':10},timeout=300)
    # graph.invoke() is the method used to start and run your entire LangGraph state machine.
    # It is the command that takes the input query and executes the entire RAG (Retrieval-Augmented
    # Generation) workflow you defined until it reaches the end point.
    """
    recursion_limit sets the maximum depth of nested node calls in LangGraph.
    In our linear graph (rag_tool → final_summary), it doesn’t affect execution,
    but it prevents infinite loops in more complex graphs.
    """

    print("\n" + "=" * 80)
    print(final_state['rag_result'])
    print("=" * 80 + "\n")

  print("\n--- Geopy Tool Test ---")
  try:
    res = requests.get(f"http://127.0.0.1:{API_PORT}/get_branch_info?zip_code=500034")
    print(f"Branch address: {res.json().get('branch_address')}")
  except Exception as e:
    print(f"Geopy failed: {e}")


  try:
    print("\nServer running. Press Ctrl+C to stop Uvicorn and close Ngrok tunnel.")

    while True:
      time.sleep(1)

  except KeyboardInterrupt:
    if tunnel_object:
      ngrok.kill()
    print("\nShutdown complete.")


**INTERNAL FASTAPI PORT:** 5138
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Waiting for file access....

Loading augmentation model: google/flan-t5-large


Device set to use cpu


LLM pipeline is ready.

  --> Documents Indexed: 8

Vector Store is ready.

Waiting 15s for uvicorn server to stabilize....

FastAPI is live on PUBLIC ULR: https://autoicous-daine-homelike.ngrok-free.dev
Swagger UI                   : https://autoicous-daine-homelike.ngrok-free.dev/docs


-- Query: What is the fixed interest rate and the maximum te

LangGraph: Calling Rag tool for query: What is the fixed interest rate and the maximum te....
First attempt to get relevent information from Vector Store
Debug: Retrieved Context for What is the fixed in....': --- URA Bank Loan Product Information ---

### Core Consumer Loans
The fixed interest rate for the *....

**FINAL REPORT from LangGraph**
**Query:** What is the fixed interest rate and the maximum tenure for the Unsecured Personal Loan compared to the Home Purchase Loan?
**Answer (via google/flan-t5-large Augmentation):** ### The fixed interest rate for the **Unsecured Personal Loan** is **7.5%** with a maximum tenure of **60 months**