# Building AI Agent Bot With RAG, Langchain, and Reasoning Engine From Scratch

In [1]:
from IPython.display import display, Markdown

from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents import tool
from langchain.pydantic_v1 import BaseModel, Field

from langchain.memory import ChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory

from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
)

from vertexai.preview import reasoning_engines
from langchain_google_vertexai import HarmBlockThreshold, HarmCategory
import requests

In [2]:
# constan definitions
# USE_CLOUDSQL = False
USE_CLOUDSQL = True

project_id = "imrenagi-gemini-experiment" #change this to your project id
region = "us-central1" #change this to project location
gemini_embedding_model = "text-embedding-004"
gemini_llm_model = "gemini-1.5-pro-001"
staging_bucket_name = "courses-imrenagicom-agent"
staging_bucket_uri = f"gs://{staging_bucket_name}"

cloudrun_services = !gcloud run services describe courses-api --region=us-central1 --format='value(status.url)'
api_base_url = cloudrun_services[0]
# api_base_url = "localhost:8080"

if not USE_CLOUDSQL:
    # use pgvector docker image for local development
    database_password = "pyconapac"
    database_name = "pyconapac"
    database_user = "pyconapac"
    database_host = "localhost"
else:
    # use cloudsql credential if you want to use cloudsql
    instance_name="pyconapac-demo"
    database_password = 'testing'
    database_name = 'testing'
    database_user = 'testing'


assert database_name, "⚠️ Please provide a database name"
assert database_user, "⚠️ Please provide a database user"
assert database_password, "⚠️ Please provide a database password"

if USE_CLOUDSQL:
    # get the ip address of the cloudsql instance
    ip_addresses = !gcloud sql instances describe {instance_name} --format="value(ipAddresses[0].ipAddress)"
    database_host = ip_addresses[0]

db_conn_string = f"postgres://{database_user}:{database_password}@{database_host}:5432/{database_name}"

embeddings_table_name = "course_content_embeddings"

print(f"db connection: {db_conn_string}")
print(f"api base url: {api_base_url}")

db connection: postgres://testing:testing@35.232.5.157:5432/testing
api base url: https://courses-api-uzttxm4diq-uc.a.run.app


In [4]:
import vertexai
vertexai.init(project=project_id, location=region, staging_bucket=staging_bucket_uri)

from langchain_google_vertexai import VertexAIEmbeddings
embeddings_service = VertexAIEmbeddings(model_name=gemini_embedding_model)

In [109]:
class CourseAPIClient:
  def __init__(self, url=api_base_url):
    self.url = url
    
  def list_courses(self):
      response = requests.get(f"{self.url}/courses")
      return response.json()

  def get_course(self, course_name):
      response = requests.get(f"{self.url}/courses/{course_name}")
      return response.json()

  def create_order(self, course, user_name, user_email):
      payload = {
          "course": course,
          "user_name": user_name,
          "user_email": user_email
      }
      response = requests.post(f"{self.url}/orders", json=payload)
      return response.json()

  def get_order(self, order_id):
      response = requests.get(f"{self.url}/orders/{order_id}")
      return response.json()

  def pay_order(self, order_id):
      response = requests.post(f"{self.url}/orders/{order_id}:pay")
      return response.json()

  def get_payment_page_url(self, order_id):
      return f"{self.url}/orders/{order_id}/payment"

In [110]:
from typing import List

In [111]:
class CreateOrderInput(BaseModel):
    course: str = Field(description="name of the course. this is the unique identifier of the course. it typically contains the course title with dashes, all in lowercase.")
    user_name: str = Field(description="name of the user who is purchasing the course .")
    user_email: str = Field(description="email of the user who is purchasing the course.")

In [112]:
class GetCourseInput(BaseModel):
    course: str = Field(description="name of the course. this is the unique identifier of the course. it typically contains the course title with dashes, all in lowercase.")

In [113]:
class GetOrderInput(BaseModel):
    order_number: str = Field(description="order number identifier. this is a unique identifier in uuid format.")


In [205]:
from typing import Optional, Type

from langchain import hub

from vertexai.preview import reasoning_engines 
from langchain.tools.retriever import create_retriever_tool # type: ignore
from langchain.agents import AgentExecutor, create_react_agent
from langchain_google_vertexai import ChatVertexAI
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.tools import BaseTool, StructuredTool, tool

from langchain_google_cloud_sql_pg import (
    PostgresEngine,
    PostgresVectorStore,
)

class CourseAgent(reasoning_engines.Queryable):
    def __init__(
        self,
        model: str,
        project: str,
        region: str,
        instance: str,
        database: str,
        table: str,
        user: Optional[str] = None,
        password: Optional[str] = None,
        
    ):
        self.model_name = model
        self.project = project
        self.region = region
        self.instance = instance
        self.database = database
        self.table = table
        self.user = user
        self.password = password                
        self.store = {}   
        self.agent = None
        self.retriever = None     

    def __getstate__(self):
        """Custom method for pickling the object."""
        state = self.__dict__.copy()
        # Remove the unpicklable entries
        del state['agent']
        del state['retriever']
        return state

    def __setstate__(self, state):
        """Custom method for unpickling the object."""
        self.__dict__.update(state)
        self.agent = None
        self.retriever = None
        # Note: set_up() will need to be called after unpickling


    def list_courses(self) -> List[str]:
        """List all available courses sold on the platform."""        
        client = CourseAPIClient()
        return client.list_courses()

    def create_order(self, course: str, user_name: str, user_email: str) -> str:
        """Create order for a course. This function can be used to create an order for a course. When this function returns successfully, it will return payment url to user to make payment. """        
        client = CourseAPIClient()
        print(f"Creating order for course: {course}, user_name: {user_name}, user_email: {user_email}")
        
        res = client.create_order(course, user_name, user_email)
        print(res)
        order_id = res["order_id"]
        payment_url = f"{api_base_url}/orders/{order_id}/payment"
        return f"Order number {order_id} created successfully. Payment URL: {payment_url}"

    def get_course(self, course: str) -> str:        
        """Get course details by course name. course name is the unique identifier of the course. it typically contains the course title with dashes.
        This function can be used to get course details such as course price, etc."""        
        client = CourseAPIClient()
        return client.get_course(course)
    
    def get_order(self, order_number: str) -> str:
        """Get order by using order number. This function can be used to get order details such as payment status to check whether the order has been paid or not. If user already paid the course, say thanks"""        
        client = CourseAPIClient()
        return client.get_order(order_number)

    def search_course_content(self, query: str) -> str:
        """Explain about software security course materials."""
        result = str(self.retriever.invoke(query))
        return result        

    def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
            if session_id not in self.store:
                self.store[session_id] = ChatMessageHistory()
            return self.store[session_id]        

    def set_up(self):
        """All unpickle-able logic should go here.
        In general, add any logic that requires a network or database
        connection.
        """

        # Initialize the vector store
        engine = PostgresEngine.from_instance(
            self.project,
            self.region,
            self.instance,
            self.database,
            user=self.user,
            password=self.password,
            quota_project=self.project,
        )

        embeddings_service = VertexAIEmbeddings(model_name=gemini_embedding_model)
        
        vector_store = PostgresVectorStore.create_sync(
            engine,
            table_name=self.table,
            embedding_service=embeddings_service,
        )
        self.retriever = vector_store.as_retriever(search_kwargs={"k": 10})
                
        search_course_content = StructuredTool.from_function(
            func=self.search_course_content,
            name="search_course_content",
            description="Explain about software security course materials.",            
        )

        list_courses = StructuredTool.from_function(
            func=self.list_courses,
            name="list_courses",
            description="List all available courses sold on the platform.",
        )

        get_course = StructuredTool.from_function(
            func=self.get_course,
            name="get-course-tool",
            description="""Get course details by course name. course name is the unique identifier of the course. it typically contains the course title with dashes.
        This function can be used to get course details such as course price, etc.""",
            args_schema=GetCourseInput,
        )

        create_order = StructuredTool.from_function(
            func=self.create_order,
            name="create-order-tool",
            description="""Create order for a course. This function can be used to create an order for a course. When this function returns successfully, it will return payment url to user to make payment.""",
            args_schema=CreateOrderInput,
        )

        get_order = StructuredTool.from_function(
            func=self.get_order,
            name="get-order-tool",
            description="""Get order by using order number. This function can be used to get order details such as payment status to check whether the order has been paid or not. If user already paid the course, say thanks""",
            args_schema=GetOrderInput,
        )
        
        tools = [search_course_content, list_courses, get_course, create_order, get_order]

        # Initialize the LLM and prompt
        prompt = {
            "chat_history": lambda x: x["history"],
            "input": lambda x: x["input"],
            "agent_scratchpad": (
                lambda x: format_to_openai_function_messages(x["intermediate_steps"])
            ),
        } | ChatPromptTemplate(
            messages = [
                SystemMessagePromptTemplate.from_template("""
                You are a bot assistant that sells online course about software security. You only use information provided from datastore or tools. You can provide the information that is relevant to the user's question or the summary of the content. If they ask about the content, you can give them more detail about the content. If the user seems interested, you may suggest the user to enroll in the course. 
                """),
                MessagesPlaceholder(variable_name="chat_history", optional=True),
                HumanMessagePromptTemplate.from_template("Use tools to answer this questions: {input}"),
                MessagesPlaceholder(variable_name="agent_scratchpad"),
            ]
        )

        safety_settings = {
            HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        }

        ## Model parameters
        model_kwargs = {
            "temperature": 0.5,
            "safety_settings": safety_settings,
        }


        self.agent = reasoning_engines.LangchainAgent(
            model=self.model_name,
            tools=tools,
            prompt=prompt,    
            chat_history=self.get_session_history,
            agent_executor_kwargs={
                "return_intermediate_steps": True,
            },
            model_kwargs=model_kwargs,
            enable_tracing=True,
        )
        print("agent is configured")


    def query(self, input: str, session_id: str) -> str:
        """Query the application.

        Args:
            input: The user query.
            session_id: The user's session id.

        Returns:
            The LLM response dictionary.
        """
        response = self.agent.query(
            input=input,
            config={"configurable": {"session_id": session_id}},
        )
        return response

In [209]:
agent = CourseAgent(
    model=gemini_llm_model,
    project=project_id,
    region=region,
    instance=instance_name,
    database=database_name,
    table=embeddings_table_name,    
    user=database_user,
    password=database_password,           
)
agent.set_up()

ClientConnectorDNSError: Cannot connect to host sqladmin.googleapis.com:443 ssl:default [Temporary failure in name resolution]

In [203]:
import uuid

# Generate a UUID for the session ID
session_id = str(uuid.uuid4())
print(f"Generated session ID: {session_id}")

Generated session ID: 2f6f7f10-b3be-44b4-923f-a4d1b43abefb


In [204]:
res = agent.query(
    input="Can you please share what are taught on this course?", 
    session_id=session_id)

display(Markdown(res["output"]))

Retrying langchain_google_vertexai.chat_models._completion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ServiceUnavailable: 503 DNS resolution failed for us-central1-aiplatform.googleapis.com:443: C-ares status is not ARES_SUCCESS qtype=A name=us-central1-aiplatform.googleapis.com is_balancer=0: Timeout while contacting DNS servers.
Retrying langchain_google_vertexai.chat_models._completion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ServiceUnavailable: 503 DNS resolution failed for us-central1-aiplatform.googleapis.com:443: C-ares status is not ARES_SUCCESS qtype=A name=us-central1-aiplatform.googleapis.com is_balancer=0: Timeout while contacting DNS servers.
Retrying langchain_google_vertexai.chat_models._completion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ServiceUnavailable: 503 DNS resolution failed for us-central1-aiplatform.googleapis.com:443: C-ares status is not ARES_SUCCESS qty

KeyboardInterrupt: 

# Deploying the Agent on Vertex AI

Deploying is as simple as calling `create()` method. We will provide the agent here and some dependencies required to run the agent.

In [147]:
remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform==1.69.0",
        "google-cloud-aiplatform[langchain]",
        "google-cloud-aiplatform[reasoningengine]",
        "langchain==0.2.16",
        "langchain_core==0.2.39",
        "langchain_community==0.2.17",
        "langchain-google-vertexai==1.0.10",
        "cloudpickle==3.1.0",
        "pydantic==2.9.2",
        "langchain-google-community==1.0.8",
        "google-cloud-discoveryengine==0.12.3",
        "nest-asyncio",
        "asyncio==3.4.3",
        "asyncpg==0.29.0",
        "cloud-sql-python-connector[asyncpg]==1.12.1",        
        "langchain-google-cloud-sql-pg==0.11.0",
        "numpy",
        "pandas",
        "pgvector==0.3.5",
        "psycopg2-binary==2.9.9",
        "requests",
        "google-cloud-trace"
    ],    
    display_name="course-agent",
    sys_version="3.11",
)
remote_agent

Using bucket courses-imrenagicom-agent
Writing to gs://courses-imrenagicom-agent/reasoning_engine/reasoning_engine.pkl
Writing to gs://courses-imrenagicom-agent/reasoning_engine/requirements.txt
Creating in-memory tarfile of extra_packages
Writing to gs://courses-imrenagicom-agent/reasoning_engine/dependencies.tar.gz
Creating ReasoningEngine
Create ReasoningEngine backing LRO: projects/896489987664/locations/us-central1/reasoningEngines/2034377986362310656/operations/1362851981252624384
ReasoningEngine created. Resource name: projects/896489987664/locations/us-central1/reasoningEngines/2034377986362310656
To use this ReasoningEngine in another session:
reasoning_engine = vertexai.preview.reasoning_engines.ReasoningEngine('projects/896489987664/locations/us-central1/reasoningEngines/2034377986362310656')


<vertexai.reasoning_engines._reasoning_engines.ReasoningEngine object at 0x7f38c0371a50> 
resource name: projects/896489987664/locations/us-central1/reasoningEngines/2034377986362310656

### Grant Discovery Engine Editor access to Reasoning Engine service account

Before you send queries to your remote agent, you'll need to grant the **Discovery Engine Editor** role to the Reasoning Engine service account.

After you've completed this step, you remote agent will be able to retrieve documents from the data store that you created in Vertex AI Search:

In [159]:
# Retrieve the project number associated with your project ID
from googleapiclient import discovery
service = discovery.build("cloudresourcemanager", "v1")
request = service.projects().get(projectId=project_id)
response = request.execute()
project_number = response["projectNumber"]
project_number

'896489987664'

In [198]:
!gcloud services enable cloudtrace.googleapis.com

In [186]:
# # Add a new role binding to the IAM policy
!gcloud projects add-iam-policy-binding {project_id} \
    --member=serviceAccount:service-{project_number}@gcp-sa-aiplatform-re.iam.gserviceaccount.com \
    --role=roles/discoveryengine.editor

!gcloud projects add-iam-policy-binding {project_id} \
    --member=serviceAccount:service-{project_number}@gcp-sa-aiplatform-re.iam.gserviceaccount.com \
    --role="roles/cloudsql.client"

!gcloud projects add-iam-policy-binding {project_id} \
    --member=serviceAccount:service-{project_number}@gcp-sa-aiplatform-re.iam.gserviceaccount.com \
    --role="roles/run.invoker"

!gcloud projects add-iam-policy-binding {project_id} \
    --member=serviceAccount:service-{project_number}@gcp-sa-aiplatform-re.iam.gserviceaccount.com \
    --role="roles/cloudtrace.agent"

!gcloud projects add-iam-policy-binding {project_id} \
    --member=serviceAccount:service-{project_number}@gcp-sa-aiplatform-re.iam.gserviceaccount.com \
    --role="roles/editor"


Updated IAM policy for project [imrenagi-gemini-experiment].
bindings:
- members:
  - serviceAccount:service-896489987664@gcp-sa-aiplatform-cc.iam.gserviceaccount.com
  role: roles/aiplatform.customCodeServiceAgent
- members:
  - serviceAccount:service-896489987664@gcp-sa-vertex-ex-cc.iam.gserviceaccount.com
  role: roles/aiplatform.extensionCustomCodeServiceAgent
- members:
  - serviceAccount:service-896489987664@gcp-sa-vertex-ex.iam.gserviceaccount.com
  role: roles/aiplatform.extensionServiceAgent
- members:
  - serviceAccount:service-896489987664@gcp-sa-vertex-rag.iam.gserviceaccount.com
  role: roles/aiplatform.ragServiceAgent
- members:
  - serviceAccount:service-896489987664@gcp-sa-aiplatform-re.iam.gserviceaccount.com
  role: roles/aiplatform.reasoningEngineServiceAgent
- members:
  - serviceAccount:service-896489987664@gcp-sa-aiplatform.iam.gserviceaccount.com
  role: roles/aiplatform.serviceAgent
- members:
  - serviceAccount:service-896489987664@gcp-sa-artifactregistry.iam.g

In [210]:
import uuid

# Generate a UUID for the session ID
session_id = str(uuid.uuid4())
print(f"Generated session ID: {session_id}")

Generated session ID: d6eab0eb-1a0c-424e-bbbc-8711a559aded


In [211]:
# Testing the remote agent
response = remote_agent.query(
  input="Can you please share what are being taught on this course",
  session_id=session_id,
)
display(Markdown(response["output"]))

This course covers crucial aspects of software security including:

- **Authentication and Authorization:** Learn how to securely implement user authentication and authorization mechanisms, including best practices for password management, token-based authentication, and role-based access control (RBAC).
- **REST API Security:** Understand the common vulnerabilities in REST APIs and how to mitigate them. This includes topics such as input validation, output encoding, and protection against cross-site scripting (XSS) and cross-site request forgery (CSRF) attacks.
- **SQL Injection Prevention:** Learn about SQL injection attacks and how to prevent them by using parameterized queries and other secure coding practices.
- **File Upload Security:** Discover the risks associated with file uploads and how to securely handle them, including file type validation, file size limits, and protection against malicious uploads.
- **Security Cheat Sheets:** Access a collection of cheat sheets that provide concise and practical guidance on various security topics.

This is just a glimpse of the topics covered.  Would you like to know more? 


In [189]:
# Testing the remote agent
response = remote_agent.query(
  input="Does it teach about how to design a forgot password system securely?",
  session_id=session_id,
)
display(Markdown(response["output"]))

Yes, the course includes a cheat sheet dedicated to "Forgot Password" best practices. It covers topics like securely resetting passwords, handling password reset requests, and what to do after a user has proven their identity. 
Would you like to know more?


In [212]:
# Testing the remote agent
response = remote_agent.query(
  input="How much this course costs?",
  session_id=session_id,
)
display(Markdown(response["output"]))

Which course are you interested in? 


In [191]:
# Testing the remote agent
response = remote_agent.query(
  input="Yes. I want to enroll",
  session_id=session_id,
)
display(Markdown(response["output"]))

Great! I have created an order for you with order number c8ceb3d6-035c-40ab-9680-3a5caf5d0f07. You can make the payment through this link: https://courses-api-uzttxm4diq-uc.a.run.app/orders/c8ceb3d6-035c-40ab-9680-3a5caf5d0f07/payment. Once you complete the payment, you will get access to all course materials.


In [192]:
# Testing the remote agent
response = remote_agent.query(
  input="Name is Mulyono and email is mulyono@gmail.com",
  session_id=session_id,
)
display(Markdown(response["output"]))

Sure, Mulyono. I can help with that. What would you like to know? 


In [193]:
# Testing the remote agent
response = remote_agent.query(
  input="Yes",
  session_id=session_id,
)
display(Markdown(response["output"]))

Great! I have created an order for you with order number 8dcd879e-8412-418f-8574-e98126b58dca. You can make the payment through this link: https://courses-api-uzttxm4diq-uc.a.run.app/orders/8dcd879e-8412-418f-8574-e98126b58dca/payment. Once you complete the payment, you will get access to all course materials.


In [194]:
# Testing the remote agent
response = remote_agent.query(
  input="Can you please check the status of my order",
  session_id=session_id,
)
display(Markdown(response["output"]))

Could you please provide your order number? 


# Clean Up

Don't forget to clean up the resources after you are done with the agent.

In [195]:
# reasoning_engines.ReasoningEngine.list()

In [196]:
# remote_agent = reasoning_engines.ReasoningEngine('projects/896489987664/locations/us-central1/reasoningEngines/6601028008515993600')

# remote_agent.delete()

In [197]:
# print(remote_agent.query(input="movies about engineers"))