# Vertex AI Search for Retrieval, Gemini for Generation

This example is a minimal demonstration of **Grounding** (aka: RAG) using [Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/enterprise-search-introduction) (without the LLM add on) and [Gemini](https://ai.google.dev/docs).

1. [Prep] Search: Process Docs --> Keywords & Embeddings _(prerequisite)_
1. [Retrieval] Search: **Query --> Chunks**
1. [Generation] Gemini: **Prompt w/ Context --> Response**

Vertex AI Search can be used as a RAG provider in many different configurations. Several examples at https://github.com/GoogleCloudPlatform/generative-ai/blob/main/search/.


## Setup

In [None]:
!pip install bigframes google-cloud-aiplatform --quiet --upgrade-strategy only-if-needed

## Helper functions

In [None]:
# @title Retrieval on Vertex AI Search (raw HTTP POST API).
import json
import requests

def do_retrieval(query, max_results=5, max_segments=4):
  servingConfigPath = f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search"
  url = f"https://discoveryengine.googleapis.com/v1alpha/{servingConfigPath}:search"
  headers = {
      "Authorization": f"Bearer {access_token}",
      "Content-Type": "application/json",
  }
  post_data = {
      "servingConfig": servingConfigPath,
      "pageSize": max_results,
      "query": query,
      "contentSearchSpec": {
          "summarySpec": {"summaryResultCount": 1},
          "extractiveContentSpec": {"maxExtractiveSegmentCount": max_segments, "returnExtractiveSegmentScore": 1},
      }
  }
  if DATA_STORE_MODE == "chunked":
      post_data["contentSearchSpec"] = {"searchResultMode": "CHUNKS"}

  response = requests.post(url, headers=headers, json=post_data)

  if response.status_code != 200:
    print(
        f"Error retrieving search results: {response.status_code} -"
        f" {response.text}"
    )

  return response.json()




In [None]:
# @title Transform retrieval into dataframe (optional).

import pandas as pd
import hashlib

def retrieval_to_df(retrieval):
  if DATA_STORE_MODE == "chunked":
      return pd.DataFrame([
          {
              'doc_index': idx,
              'id': chunk['chunk']['id'],
              'title': chunk['chunk']['documentMetadata']['title'],
              'link': chunk['chunk']['documentMetadata']['uri'],
              'page': chunk['chunk']['pageSpan']['pageStart'],
              'pageEnd': chunk['chunk']['pageSpan']['pageEnd'],
              'score': chunk['chunk']['relevanceScore'],
              'hash': hashlib.md5(chunk['chunk']['content'].encode()).hexdigest(),
              'content': chunk['chunk']['content']
          }
          for idx, chunk in enumerate(retrieval['results'])
      ])
  else:
      return pd.DataFrame([
          {
              'doc_index': idx,
              'title': doc['document']['derivedStructData']['title'],
              'link': doc['document']['derivedStructData']['link'],
              'segment_index': segment_idx,
              'page': segment.get('pageNumber', "none"),
              'score': segment.get('relevanceScore', "none"),
              'hash': hashlib.md5(segment['content'].encode()).hexdigest(),
              'content': segment['content']
          }
          for idx, doc in enumerate(retrieval['results'])
          for segment_idx, segment in enumerate(doc['document']['derivedStructData']['extractive_segments'])
      ])

In [None]:
# @title Transform retrieval_as_df to only the most relevant facts.

def only_top_retrieval(retrieval_as_df, max_rows=5, min_score=0.7):
    filtered_df = retrieval_as_df.copy()

    # Filter out rows with score below 0.7
    filtered_df = filtered_df[filtered_df['score'] >= min_score]

    # Sort by score (descending)
    filtered_df = filtered_df.sort_values(by='score', ascending=False)

    # Limit to top 5 rows
    filtered_df = filtered_df.head(max_rows)

    return filtered_df

In [None]:
# @title Transform retrieval into "context" for a prompt.

from urllib.parse import quote


# TODO convert to working public URLs from gs:// urls.

def make_context(retrieval_as_df):
    context_parts = []
    for _, row in retrieval_as_df.iterrows():
        # URL returned as the URL of the document:
        doc_link = row['link'].strip()
        # replace gs:// with http:// to make storage links look like real urls.
        doc_link = doc_link.replace("gs://", "http://")
        # urlencode the link, unsuring spaces and special characters are encoded
        doc_link = quote(doc_link, "/")
        context_parts.append(f"DOCUMENT_TITLE: {row['title'].strip()}\n")
        context_parts.append(f"DOCUMENT_LINK: {doc_link}\n")
        context_parts.append(f"DOCUMENT_TITLE: {row['title'].strip()}\n")
        context_parts.append(f"RELEVANCE_SCORE: {row['score']}\n")
        context_parts.append("_START_OF_DOCUMENT_CONTENT:\n")

        # Logic based on if it's a chunked data store or not
        if 'pageEnd' in row:  # Chunked data store
            context_parts.append(f"Page {row['page']} - {row['pageEnd']}\n")
        if 'page' in row:  # Segments data store
            context_parts.append(f"Page {row['page']}\n")

        context_parts.append(row['content'].strip())
        context_parts.append("\n_END_OF_DOCUMENT_CONTENT\n\n")

    return "".join(context_parts)

In [None]:
# @title Transform "query" and "content" into a prompt.

def make_prompt(query, context):
    prompt = f"""Answer the user's question grounded with the document context.

## Instructions for Gemini Model

You are a helpful assistant that answers employee questions about Cymbal Bank benefits.

**Answering Questions**

* Answer the user's question using **only** the provided document context.
* All questions will be from Cymbal Bank employees.
* Document context is provided in chunks. Use all chunks to answer.
* Answers should be concise and easy to read.
* Use bullet points to list information whenever possible.
* If the documents don't contain enough information to answer confidently, respond: "The materials do not appear to be sufficient to provide a good answer."
* **Always** recommend users contact Cymbal Bank HR if they have further questions.

**Using Markdown**

* Format your answers using markdown for clarity.
* When referencing a document, **always** use markdown link syntax: `[Document Title](Document Link)`

**Document Structure**

Document chunks follow this structure:

```
DOCUMENT_TITLE: Document Name
DOCUMENT_LINK: https://document.url
RELEVANCE_SCORE: 0.8 (Example)
_START_OF_DOCUMENT_CONTENT:
... Document content goes here ...
_END_OF_DOCUMENT_CONTENT
```

**Example**

```
# QUESTION

Where can I find information about my 401k plan?

# DOCUMENT CONTEXT

DOCUMENT_TITLE: Cymbal Bank Benefits Overview
DOCUMENT_LINK: https://benefits.cymbalbank.com/overview.pdf
RELEVANCE_SCORE: 0.95
_START_OF_DOCUMENT_CONTENT:
Welcome to Cymbal Bank! You can find detailed information about your 401(k) plan on page 12.
_END_OF_DOCUMENT_CONTENT

# QUESTION REPEATED

Where can I find information about my 401k plan?

# ANSWER GROUNDED ON CONTEXT

Information about your 401(k) plan is located on page 12 of the [Cymbal Bank Benefits Overview](https://benefits.cymbalbank.com/overview.pdf). For further assistance, please contact Cymbal Bank HR.
```

# QUESTION

{query}

# DOCUMENT CONTEXT

{context}

# QUESTION REPEATED

{query}

# ANSWER GROUNDED ON CONTEXT

(Your answer goes here)

"""
    return prompt

In [None]:
# @title Gemini generation based on prompt.
import pandas as pd
import bigframes.pandas as bpd
import vertexai
import vertexai.preview.generative_models as generative_models
from vertexai.preview.generative_models import GenerativeModel, Part

# Set your project and location
vertex_location = "us-central1"

vertexai.init(project=PROJECT_ID, location=vertex_location)
model = GenerativeModel("gemini-1.5-pro-001")

def get_answer(prompt):
  parameters = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
  }

  safety_settings = {
      generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: (
          generative_models.HarmBlockThreshold.BLOCK_NONE
      ),
      generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: (
          generative_models.HarmBlockThreshold.BLOCK_NONE
      ),
      generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: (
          generative_models.HarmBlockThreshold.BLOCK_NONE
      ),
      generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: (
          generative_models.HarmBlockThreshold.BLOCK_NONE
      ),
  }

  response = model.generate_content(
      prompt, generation_config=parameters, safety_settings=safety_settings
  )

  return response.text


## Demo All in One Shot


In [None]:
print(f"Query: [{QUERY}]\n")
retrieval = do_retrieval(QUERY)
df = retrieval_to_df(retrieval)
df = only_top_retrieval(df)
context = make_context(df)
prompt = make_prompt(QUERY, context)
response = get_answer(prompt)
print(response)

Query: [What types of heath insurance are offered by cymbal bank?]

Cymbal Bank offers the following health insurance plans to employees:

* **HMO (Health Maintenance Organization):** This plan features a network of providers, requires you to select a primary care physician (PCP), and requires referrals to see specialists. 
* **High Deductible HMO:** This plan is similar to the HMO but has higher deductibles and lower premiums. This plan makes you eligible for a Health Savings Account (HSA). 
* **PPO (Preferred Provider Organization):** This plan offers more flexibility. You can choose from a larger network of providers, including out-of-network options, without needing a PCP or referrals. 

You can review a detailed comparison of Cymbal Bank's health insurance plans in the  [Cymbal Bank Employee Benefits Summary- US](http%3A//alanblount-cymbal-docs/Cymbal%20Bank%20Employee%20Benefits%20Summary-%20US.pdf). For further assistance, please contact Cymbal Bank HR. 



## Demo Gemini Generation (Showing Steps)

In [None]:
# @title [Retrieval]

from IPython.display import display, HTML

print(f"Query: [{QUERY}]\n")
%time retrieval = do_retrieval(QUERY, 5)

# Transform to dataframe for easy viewing.
retrieval_as_df = retrieval_to_df(retrieval)
display(retrieval_as_df)

Query: [What types of heath insurance are offered by cymbal bank?]

CPU times: user 81.6 ms, sys: 3.1 ms, total: 84.7 ms
Wall time: 281 ms


Unnamed: 0,doc_index,id,title,link,page,pageEnd,score,hash,content
0,0,c1,Cymbal Bank Employee Benefits Policy- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,1,3,0.778671,7aab28d536b4052ad4708cb18244c801,Cymbal Bank Employee Benefits Policy \n# 1. In...
1,1,c2,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,2,5,0.841151,61e646be215db0c2a893c9b677741c57,\n# Summary of Features\n\n_START_OF_TABLE_\nT...
2,2,c1,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,1,2,0.847561,8d5c61d29416f8d0dd2ebb2fc53687e6,\n# Cymbal Bank Employee Benefits Summary\nWel...
3,3,c4,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,5,7,0.819692,0ef828796b6b21948337ccbecc54b5b7,\n# Dental Plan Options Comparison\n\n_START_O...
4,4,c5,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,7,9,0.82144,9a892121e586c33361b869f8d976fcfa,\n# Covered Services\n\n_START_OF_TABLE_\nTABL...


In [None]:
# @title [Limit retrieval to top 3, require high relevancy]

top_3_df = only_top_retrieval(retrieval_as_df, 3, 0.8)
display(top_3_df)

Unnamed: 0,doc_index,id,title,link,page,pageEnd,score,hash,content
2,2,c1,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,1,2,0.847561,8d5c61d29416f8d0dd2ebb2fc53687e6,\n# Cymbal Bank Employee Benefits Summary\nWel...
1,1,c2,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,2,5,0.841151,61e646be215db0c2a893c9b677741c57,\n# Summary of Features\n\n_START_OF_TABLE_\nT...
4,4,c5,Cymbal Bank Employee Benefits Summary- US,gs://alanblount-cymbal-docs/Cymbal Bank Employ...,7,9,0.82144,9a892121e586c33361b869f8d976fcfa,\n# Covered Services\n\n_START_OF_TABLE_\nTABL...


In [None]:
# @title [Transform to Prompt with Context]
context = make_context(top_3_df)
prompt = make_prompt(QUERY, context)
print(prompt)

Answer the user's question grounded with the document context.

## Instructions for Gemini Model

You are a helpful assistant that answers employee questions about Cymbal Bank benefits.

**Answering Questions**

* Answer the user's question using **only** the provided document context.
* All questions will be from Cymbal Bank employees.
* Document context is provided in chunks. Use all chunks to answer.
* Answers should be concise and easy to read.
* Use bullet points to list information whenever possible.
* If the documents don't contain enough information to answer confidently, respond: "The materials do not appear to be sufficient to provide a good answer."
* **Always** recommend users contact Cymbal Bank HR if they have further questions.

**Using Markdown**

* Format your answers using markdown for clarity.
* When referencing a document, **always** use markdown link syntax: `[Document Title](Document Link)`

**Document Structure**

Document chunks follow this structure:

```
DOCUM

In [None]:
# @title [Generation]
%time response = get_answer(prompt)
print()
print(response)

CPU times: user 61.3 ms, sys: 11 ms, total: 72.3 ms
Wall time: 5.46 s

Cymbal Bank offers three types of health insurance:

* **HMO (Health Maintenance Organization):** Offers a network of providers for comprehensive coverage with lower out-of-pocket costs. Requires primary care physician (PCP) selection and referrals for specialists.
* **High Deductible HMO:** Similar to the HMO but with higher deductibles and lower premiums, making you eligible for a Health Savings Account (HSA).
* **PPO (Preferred Provider Organization):** Provides more flexibility in choosing providers, including out-of-network options, without the need for a PCP or referrals. 

For a detailed comparison of these plans, please refer to pages 1-2 of the [Cymbal Bank Employee Benefits Summary- US](http%3A//alanblount-cymbal-docs/Cymbal%20Bank%20Employee%20Benefits%20Summary-%20US.pdf).

For further questions, contact Cymbal Bank HR. 



## [WIP] Demo Grounded Generation

> TODO(alanblount) this is WIP today, [docs](https://cloud.google.com/generative-ai-app-builder/docs/grounded-gen)

In [None]:
# Scratch pad until I get it working.


# Re-use prior step "retrieval_as_df" dataframe, convert it into


def convert_retrieval_as_df_to_inline_source_grounding_facts(retrieval_as_df):
    # Convert relevant columns into a list of dictionaries with nested attributes
    return [{
        'factText': row['content'],
        'attributes': {
            'title': row['title'],
            'uri': row['link'].split('/')[-1]  # Extract filename from the link
        }
    } for _, row in retrieval_as_df.iterrows()]  # Iterate over the dataframe rows


def grounded_generation(prompt, retrieval_as_df):
  servingConfigPath = f"projects/{PROJECT_ID}/locations/{LOCATION}"
  url = f"https://discoveryengine.googleapis.com/v1/{servingConfigPath}:generateGroundedContent"
  headers = {
      "Authorization": f"Bearer {access_token}",
      "Content-Type": "application/json",
  }
  facts = convert_retrieval_as_df_to_inline_source_grounding_facts(retrieval_as_df)
  post_data = {
      "contents": [
          {
              "role": "user",
              "parts": [
                  {
                      "text": "PROMPT_TEXT"
                  }
              ]
          }
      ],
      "groundingSpec": {
          "groundingSources": [
              # Convert data frame to a list of inline facts.
              {
                  "inlineSource": {
                      # {"factText": ..., "attributes": {"title": ..., "uri": ...}}, ...
                      "groundingFacts": facts
                  }
              }
              # Optionally can list other grounding data sources.
          ]
      },
      "generationSpec": {
          "modelId": "gemini-1.5-flash",
          "temperature": 0.0,
          "topP": 0.7,
          "topK": 40,
      }
  }

  # Temp debug
  debug = pd.DataFrame([
      {"key": "url", "value": url},
      {"key": "prompt", "value": prompt},
      # {"key": "facts", "value": facts},
      {"key": "post_data", "value": post_data},
  ])
  display(debug)

  # request
  response = requests.post(url, headers=headers, json=post_data)

  if response.status_code != 200:
    print(
        f"Error retrieving search results: {response.status_code} -"
        f" {response.text}"
    )

  return response.json()




output = grounded_generation(QUERY, top_3_df)


display(output)

Unnamed: 0,key,value
0,url,https://discoveryengine.googleapis.com/v1/proj...
1,prompt,What types of heath insurance are offered by c...
2,post_data,"{'contents': [{'role': 'user', 'parts': [{'tex..."


Error retrieving search results: 404 - {
  "error": {
    "code": 404,
    "message": "Method not found.",
    "status": "NOT_FOUND"
  }
}



{'error': {'code': 404, 'message': 'Method not found.', 'status': 'NOT_FOUND'}}