In [None]:
import vertexai

PROJECT_ID = "gglobo"
REGION = "us-east4"
GSC_BUCKET = "gs://raw_doquinha"

# Initialize the SDK for Vertex AI Agent Engine
vertexai.init(
    project=PROJECT_ID,
    location=REGION,
    staging_bucket=GSC_BUCKET,
)

AI_MODEL = "gemini-2.5-flash"


In [None]:
### Create the Tool to be used: RAG Engine

from vertexai.preview import rag

EMBEDDING_MODEL = "publishers/google/models/text-embedding-005"

# Define the backend configuration for the RAG Vector Database
backend_config = rag.RagVectorDbConfig(
    rag_embedding_model_config=rag.RagEmbeddingModelConfig(
        vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
            publisher_model=EMBEDDING_MODEL
        )
    )
)

# Create the RAG corpus
rag_corpus = rag.create_corpus(
    display_name="doquinha-rag-corpus", backend_config=backend_config
)


In [None]:
### Generating list of doc files in GCS bucket
from google.cloud import storage

my_bucket = GSC_BUCKET[5:]  # Extract bucket name from GSC path

def list_files_by_extension(bucket_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs()

    files_by_extension = {}

    for blob in blobs:
        name = blob.name
        ext = '.' + name.split('.')[-1] if '.' in name else ''
        full_name = 'gs://' + bucket_name + '/' + name
        files_by_extension.setdefault(ext, []).append(full_name)

    return files_by_extension

docs = list_files_by_extension(my_bucket)

paths_md = docs[".md"]
paths_png = docs[".png"]


In [None]:
# Importing Markdown files into the RAG corpus
rag.import_files(
    corpus_name=rag_corpus.name,
    paths=paths_md,
    chunk_size=512,
    chunk_overlap=50,
)

# Define the LLM parser configuration for parsing PNG files
llm_parser_config = rag.LlmParserConfig(
    model_name = AI_MODEL,
    # max_parsing_requests_per_min=MAX_PARSING_REQUESTS_PER_MIN, # Optional
    # custom_parsing_prompt=CUSTOM_PARSING_PROMPT, # Optional
)

# Importing PNG files into the RAG corpus
rag.import_files(
    corpus_name=rag_corpus.name,
    paths=paths_png,
    llm_parser=llm_parser_config,
    chunk_size=1024,
    chunk_overlap=200,
)


In [None]:
print(rag_corpus.name)

In [None]:
from vertexai.preview import rag
rag_corpus = rag.get_corpus(name)

In [None]:
from google.adk.tools.retrieval.vertex_ai_rag_retrieval import VertexAiRagRetrieval
from dotenv import load_dotenv

# Define the instructions for the root agent
def return_instructions_root() -> str:
    instruction_prompt_v1 = """
        You are an AI assistant with access to specialized corpus of documents, containing markdown and image files.
        Your role is to provide accurate and concise answers to questions based
        on documents that are retrievable using ask_vertex_retrieval. If you believe
        the user is just chatting and having casual conversation, don't use the retrieval tool.

        But if the user is asking a specific question about a knowledge they expect you to have,
        you can use the retrieval tool to fetch the most relevant information.
        
        If you are not certain about the user intent, make sure to ask clarifying questions
        before answering. Once you have the information you need, you can use the retrieval tool
        If you cannot provide an answer, clearly explain why.

        Do not answer questions that are not related to the corpus.
        When crafting your answer, you may use the retrieval tool to fetch details
        from the corpus. Make sure to cite the source of the information.
        
        Citation Format Instructions:
 
        When you provide an answer, you must also add one or more citations **at the end** of
        your answer. If your answer is derived from only one retrieved chunk,
        include exactly one citation. If your answer uses multiple chunks
        from different files, provide multiple citations. If two or more
        chunks came from the same file, cite that file only once.

        **How to cite:**
        - Use the retrieved chunk's `title` to reconstruct the reference.
        - Include the document title and section if available.
        - For web resources, include the full URL when available.
 
        Format the citations at the end of your answer under a heading like
        "Citations" or "References." For example:
        "Citations:
        1) RAG Guide: Implementation Best Practices
        2) Advanced Retrieval Techniques: Vector Search Methods"

        Do not reveal your internal chain-of-thought or how you used the chunks.
        Simply provide concise and factual answers, and then list the
        relevant citation(s) at the end. If you are not certain or the
        information is not available, clearly state that you do not have
        enough information.
        """
    return instruction_prompt_v1

# Load environment variables from .env file
load_dotenv()

# Create the Vertex AI RAG retrieval tool
ask_vertex_retrieval = VertexAiRagRetrieval(
    name='retrieve_rag_documentation',
    description=(
        'Use this tool to retrieve documentation and reference materials for the question from the RAG corpus,'
    ),
    rag_resources=[
        rag.RagResource(
            rag_corpus=rag_corpus.name
        )
    ],
    similarity_top_k=10,
    vector_distance_threshold=0.6,
)


In [None]:
from google.adk.agents import Agent
from google.genai import types

# Define the safety settings for the model
safety_settings = [
    types.SafetySetting(
        category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=types.HarmBlockThreshold.OFF,
    ),
]

# Specify content generation parameters
generate_content_config = types.GenerateContentConfig(
   safety_settings=safety_settings,
   temperature=0.28,
   max_output_tokens=1000,
   top_p=0.95,
)

# Create the root agent with the defined instructions and tools
root_agent = Agent(
    model=AI_MODEL,
    name='doquinha_root_agent',
    generate_content_config=generate_content_config,  # Optional.
    instruction=return_instructions_root(),
    tools=[
        ask_vertex_retrieval,
    ]
)


In [None]:
from vertexai.preview.reasoning_engines import AdkApp

# Create the AdkApp instance with the root agent
app = AdkApp(agent=root_agent)

# Testing local app
USER_ID = "test_user"

for event_local in app.stream_query(
    user_id=USER_ID,
    message="O que são videoviews?",
):
    print(event_local)


In [None]:
### Deploying the agent

from vertexai import agent_engines

# Create the Agent Engine for the root agent using a service account
# remote_app = agent_engines.create(
#     agent_engine=root_agent,
#     display_name="doquinha",
#     requirements=[
#         "google-cloud-aiplatform[adk,agent_engines]"
#     ]
# )

# Create the Agent Engine for the root agent using a service account
remote_app_sa = agent_engines.create(
    agent_engine=root_agent,
    display_name="doquinha_sa",
    requirements=[
        "google-cloud-aiplatform[adk,agent_engines]"
    ],
    service_account=sa_account
)


In [None]:
# print(remote_app.resource_name)
print(remote_app_sa.resource_name)


In [None]:
# remote_app = agent_engines.get(

remote_app_sa = agent_engines.get(
    name
)

In [None]:
### Testing the remote app for the agent

from vertexai import agent_engines

remote_session = remote_app_sa.create_session(user_id=USER_ID)
async for event_remote in remote_app_sa.async_stream_query(
    user_id=USER_ID,
    session_id=remote_session["id"],
    message="O que são videoviews?"
):
    print(event_remote)


In [None]:
# remote_app.delete(force=True)
# remote_app_sa.delete(force=True)