In [None]:
import warnings 
warnings.filterwarnings("ignore")

In [None]:
# from google import genai
# from google.genai import types
# import base64
# import os

In [None]:
# from google import genai
# from google.genai import types

# client = genai.Client(
#     vertexai=True,
#     project="lending-partner",
#     location="asia-south1"
# )

# rag_tool = types.Tool(
#     retrieval=types.Retrieval(
#         vertex_rag_store=types.VertexRagStore(
#             rag_resources=[
#                 types.VertexRagStoreRagResource(
#                     rag_corpus=(
#                         "projects/lending-partner/"
#                         "locations/asia-south1/"
#                         "ragCorpora/6917529027641081856"
#                     )
#                 )
#             ]
#         )
#     )
# )

# # config
# generate_config = types.GenerateContentConfig(
#     temperature=0.3,
#     max_output_tokens=2048,
#     tools=[rag_tool],
#     system_instruction=(
#         "You are a financial compliance expert in India. "
#         "Answer strictly based on RBI regulations and retrieved documents. "
#         "If information is missing from the corpus, say 'Not found in provided documents'. "
#         "Do not hallucinate."
#     ),
# )

# # response
# response = client.models.generate_content(
#     model=MODEL_NAME,
#     contents=[
#         types.Content(
#             role="user",
#             parts=[
#                 types.Part(text="What are the rules to issue a credit card?")
#             ],
#         )
#     ],
#     config=generate_config,
# )

# print(response.text)
# print("+" * 50)

# candidate = response.candidates[0]
# grounding = candidate.grounding_metadata

# if grounding and grounding.grounding_chunks:
#     for chunk in grounding.grounding_chunks:
#         print("SOURCE DOCUMENT:")
#         print(chunk.retrieved_context.uri)
#         print("\nUSED TEXT:")
#         print(chunk.retrieved_context.text)
#         print("=" * 60)
# else:
#     print("No grounding chunks retrieved.")


In [1]:
MAX_OUTPUT_TOKENS = 70000
MODEL_NAME = "gemini-2.5-flash"

In [2]:
# ============================================================
# Gemini + Vertex RAG + LangChain Conversation Memory (WORKING)
# ============================================================

from typing import Optional, List
from google import genai
from google.genai import types
from config import SYSTEM_INSTRUCTION
# # LangChain (CORRECT for your environment)
from langchain_classic.memory import ConversationSummaryBufferMemory
from langchain_core.language_models.llms import LLM


# -------------------------
# 1. Gemini client
# -------------------------
client = genai.Client(
    vertexai=True,
    project="lending-partner",
    location="asia-south1"
)


# -------------------------
# 2. Vertex RAG tool
# -------------------------
rag_tool = types.Tool(
    retrieval=types.Retrieval(
        vertex_rag_store=types.VertexRagStore(
            rag_resources=[
                types.VertexRagStoreRagResource(
                    rag_corpus=(
                        "projects/lending-partner/"
                        "locations/asia-south1/"
                        "ragCorpora/6917529027641081856"
                    )
                )
            ]
        )
    )
)


# -------------------------
# 3. Gemini wrapper for LangChain memory summarization
# -------------------------
class GeminiLLM(LLM):
    client: genai.Client
    model_name: str = MODEL_NAME    

    @property
    def _llm_type(self) -> str:
        return "gemini"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response = self.client.models.generate_content(
            model=self.model_name,
            contents=prompt,
            config={
                "temperature": 0.2,
                "max_output_tokens": 256,
            },
        )
        return response.text


# -------------------------
# 4. LangChain memory
# -------------------------
memory = ConversationSummaryBufferMemory(
    llm=GeminiLLM(client=client),
    max_token_limit=1000,
    memory_key="history",
    input_key="input",
)


# -------------------------
# 5. Static system instruction (IMPORTANT)
# -------------------------
SYSTEM_INSTRUCTION = SYSTEM_INSTRUCTION


# -------------------------
# 6. Chat function (CORRECT payload for Vertex RAG)
# -------------------------
def chat(user_query: str):
    # Load summarized conversation memory
    history_text = memory.load_memory_variables({}).get("history", "")

    contents = []

    # Inject memory as conversational context (NOT system_instruction)
    if history_text.strip():
        contents.append(
            types.Content(
                role="user",
                parts=[
                    types.Part(
                        text=f"Conversation so far:\n{history_text}"
                    )
                ],
            )
        )

    # Current user message
    contents.append(
        types.Content(
            role="user",
            parts=[types.Part(text=user_query)],
        )
    )

    # Gemini call
    response = client.models.generate_content(
        model=MODEL_NAME,
        contents=contents,
        config=types.GenerateContentConfig(
            temperature=0.3,top_k=40,
            max_output_tokens=2048,
            tools=[rag_tool],
            system_instruction=SYSTEM_INSTRUCTION,
        ),
    )

    # Save interaction to LangChain memory
    memory.save_context(
        {"input": user_query},
        {"output": response.text},
    )
    sources = []
    candidate = response.candidates[0]
    grounding = candidate.grounding_metadata

    if grounding and grounding.grounding_chunks:
        for chunk in grounding.grounding_chunks:
            rc = chunk.retrieved_context
            sources.append({
                "uri": rc.uri,
                "text_snippet": rc.text[:100]
            })

    return {"answer":response.text, 
           "sources": sources
    }

  from .autonotebook import tqdm as notebook_tqdm
  memory = ConversationSummaryBufferMemory(


In [3]:
question="what are the type of standard asset as per RBI?"

In [4]:
x=chat(question)
x

  return len(self.get_token_ids(text))


{'answer': 'As per RBI, the types of standard assets for which general provisions are made include:\n\n*   Farm credit to agricultural activities, individual housing loans, and Small and Micro Enterprises (SMEs) sectors.\n*   Advances to Commercial Real Estate (CRE) sector.\n*   Advances to Commercial Real Estate – Residential Housing Sector (CRE - RH).\n*   Housing loans extended at teaser rates.\n*   Restructured advances as stipulated in the Reserve Bank of India (Commercial Banks – Resolution of Stressed Assets) Directions, 2025.\n*   Advances restructured and classified as standard in terms of the Master Direction – Reserve Bank of India (Relief Measures by Banks in Areas affected by Natural Calamities) Directions 2018 – SCBs.\n*   Loans to Medium Enterprises.\n*   All other loans and advances not included in the above categories.\n\n(Source: 164MD.pdf, paragraphs 80, 81)',
 'sources': [{'uri': 'gs://lending-partner/all_documents/164MD.pdf',
   'text_snippet': '72. A bank resortin

####


In [None]:
chat("Who issues them?")

In [None]:
chat("Does this apply to foreign banks?")

In [None]:
from google import genai

client = genai.Client(api_key="YOUR_API_KEY")

models = client.models.list()

for m in models:
    print(m.name)


##trying the agentic approach
