In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Unlock FHIR with RAG on Vertex AI - Part-02 (RAG)

## Run the Notebook

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.10.13

<table align="center">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/adethyaa/unlock-fhir-with-rag-on-vertexai/blob/main/02_FHIR_RAG.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/adethyaa/unlock-fhir-with-rag-on-vertexai/blob/main/02_FHIR_RAG.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/adethyaa/unlock-fhir-with-rag-on-vertexai/blob/main/02_FHIR_RAG.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>

## 1.Environment Setup

### 1.1. Authenticate

In [None]:
# Authenticate Notebook

import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### 1.2. Global Variables

In [None]:
# GCP Parameters
PROJECT_ID = "propane-crawler-363311"  # @param {type:"string"}
REGION = "us-central1"  # @param {type: "string"}

# Neo4J Connection Parameters
NEO4J_URL="bolt://localhost:7687" # @param {type:"string"}
NEO4J_USER="neo4j" # @param {type:"string"}
NEO4J_PASSWORD="password" # @param {type:"string"}

# Dimension Vertex PaLM Text Embedding
ME_DIMENSIONS = 768 # @param {type:"integer"} 
ME_DISTANCE_MEASURE_TYPE = "DOT_PRODUCT_DISTANCE" # @param {type:"string"} 

# Update to bigger SHARDS for larger data volumes & performance
# Doc - https://cloud.google.com/vertex-ai/docs/vector-search/create-manage-index
ME_SHARD_SIZE = "SHARD_SIZE_SMALL" # @param ["SHARD_SIZE_SMALL", "SHARD_SIZE_MEDIUM", "SHARD_SIZE_LARGE"] 

# Vertex AI Vector Search (MatchingEngine) Endpoint Parameters
# Doc - https://cloud.google.com/vertex-ai/docs/vector-search/create-manage-index

# The machine types that you can use to deploy your index
ME_ENDPOINT_MACHINE_TYPE = "e2-standard-2" # @param ["n1-standard-16", "n1-standard-32", "e2-standard-2", "e2-standard-16", "e2-highmem-16", "n2d-standard-32"] 

ME_ENDPOINT_MIN_REPLICA_COUNT = 2 # @param {type:"integer"} 
ME_ENDPOINT_MAX_REPLICA_COUNT = 10 # @param {type:"integer"} 

# Vertex AI Vector Search (MatchingEngine) Index Parameters
ME_INDEX_NAME = 'fhir_me_index'  # @param {type: "string"}
ME_EMBEDDING_GCS_DIR = f'{PROJECT_ID}-me-bucket' # @param {type:"string"} 
ME_DESCRIPTION = "Index for FHIR Resources" # @param {type:"string"} 

# Set the LLM to use
VERTEX_AI_MODEL_NAME = 'gemini-1.0-pro-001'
# VERTEX_AI_MODEL_NAME = 'gemini-1.5-pro-preview-0409'
TEXT_EMBEDDING_MODEL_NAME = "textembedding-gecko@003"

### 1.3. Import Libraries

In [None]:
# Utils
from pprint import pprint
import json
import csv
import os
import re
from datetime import datetime, timezone, timedelta

from rich.console import Console
from rich.markdown import Markdown

from typing import Dict, Optional, Any, List

# Google Libs
import vertexai
vertexai.init(project=PROJECT_ID, location=REGION)

from google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint import (
    Namespace,
    NumericNamespace,
)

from pydantic import BaseModel, Field

# Langchain
import langchain
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.chains import RetrievalQA, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.globals import set_debug, set_verbose
from langchain.text_splitter import RecursiveCharacterTextSplitter
print(f"LangChain version: {langchain.__version__}")

# LangChain Google Libs
from langchain_google_vertexai import VertexAI
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_vertexai import VectorSearchVectorStore

# Custom Utils
## Custom Matching Engine
from utils.matching_engine import MatchingEngine
from utils.matching_engine_utils import MatchingEngineUtils

## Neo4J
from utils.NEO4J_Graph import Graph



### 1.4. Neo4J Connection

In [None]:
%env NEO4J_USER={NEO4J_USER}
%env NEO4J_PASSWORD={NEO4J_PASSWORD}

In [None]:
# Check if Docker Container is running
! docker ps -a

In [None]:
# Start the Container if it is not running
! docker start testneo4j

In [None]:
# Instantiate & Connect to Neo4J graph
graph = Graph(NEO4J_URL, NEO4J_USER, NEO4J_PASSWORD)

In [None]:
# Test Neo4J Connection
# Get type and number of each FHIR resource in the database
resource_metrics = graph.resource_metrics()
resource_metrics.sort()
pprint(resource_metrics)

In [None]:
node_count, relationship_count = graph.database_metrics()
print('Database Metrics:')
print(f'    - Node Count = {node_count}')
print(f'    - Relationship Count = {relationship_count}')

### 1.5. VertexAI VectorSearch Connection

In [None]:
# Create Text Embedding
text_embedding_model = VertexAIEmbeddings(
    model_name=TEXT_EMBEDDING_MODEL_NAME,
    project=PROJECT_ID,
    location=REGION,
    max_retries=6
)

text_embedding_model

In [None]:
# Get Matching Engine Index id and Endpoint id
me_utils = MatchingEngineUtils(PROJECT_ID, REGION, ME_INDEX_NAME)
ME_INDEX_ID, ME_INDEX_ENDPOINT_ID = me_utils.get_index_and_endpoint()

print(f'- ME_INDEX_ID:{ME_INDEX_ID}\n- ME_INDEX_ENDPOINT_ID:{ME_INDEX_ENDPOINT_ID}')

In [None]:
vector_store = VectorSearchVectorStore.from_components(
    project_id=PROJECT_ID,
    region=REGION,
    gcs_bucket_name=f"gs://{ME_EMBEDDING_GCS_DIR}".split("/")[2],
    index_id=ME_INDEX_ID,
    endpoint_id=ME_INDEX_ENDPOINT_ID,
    stream_update=True,
    embedding=text_embedding_model
)
vector_store

In [None]:
# Test VectorSearch is Connected
query_text = 'sample Resource'
response = vector_store.similarity_search_with_score(query=query_text, k=1)
response

### 1.6. Google Vertex AI LLM Setup

In [None]:
llm = VertexAI(model_name=VERTEX_AI_MODEL_NAME)
llm.model_name

### 1.7. QA Without RAG

Asking LLM a question without context.

In [None]:
# Ask LLM a question
question = "What is the Body Height of Benjamin360 Hintz995 and when was it measured?"

no_rag_response = llm.invoke(question)

print(f'Question: {question}')
print(f'LLM Answer: {no_rag_response}')

## 2. Retrieval with RAG

In [None]:
from rich.console import Console
from rich.markdown import Markdown

console = Console()

In [None]:
sample_query = """For the patient Akiko835 Larkin917,  please identify all active medications within the provided text.
The FHIR Resource Type is Medication Request
Extract the medication name, dosage instructions, and duration if available. 
Only include medication requests where the status is explicitly mentioned as 'active'.  
Present the extracted information in a markdown table with columns 'Medication Name', and 'Status'. 
If no active medication requests are found, please indicate 'No active medications found'.
"""

In [None]:
# Setting Langchain Global Variables
from langchain.globals import set_verbose, set_debug

# Change to False if you do not want debug and execution information
langchain_debug = False
set_debug(langchain_debug)
set_verbose(langchain_debug)

### 2.1. Step-01: Get Patient Name
***Tip:*** 
- Minimize Cost & Latency - by first trying to extract patient name locally.
- If regex does not help, then use LLM.
- Fallback - Prompt user for Input
- You can use less powerful LLMs for this to save cost. E.g. Gemma(offline) or Smaller LLMs


<br>***Using Regex to extract patient name***

In [None]:
# Get Patient Name Local Function using Python regex
def extract_patient_name_with_custom_function(query: str) -> Optional[Dict[str, str]]:
    """
    Extracts patient's first and last name from the query using a regular expression.

    Args:
        query: The user's question or statement.

    Returns:
        A dictionary containing the extracted first and last names, or None if not found.
    """
    # name_pattern = re.compile(r"(?:Dr\.|Mr\.|Ms\.|Mrs\.)?\s*([A-Z][a-zA-Z0-9']+)\s+([A-Z][a-zA-Z0-9']*)")  
    name_pattern = re.compile(r"(?:Dr\.|Mr\.|Ms\.|Mrs\.)?\s*(\b(?!What\b)[A-Z][a-zA-Z0-9']*\b)(?:\s+([A-Z][a-zA-Z0-9']*)\b)?")
    match = name_pattern.search(query)
    if match:
        first_name = match.group(1)
        last_name = match.group(2) if match.group(2) else None
        patient_name = {"first_name": first_name, "last_name": last_name}
        return patient_name
        # return None
    else:
        return None

In [None]:
print(f'User Query: {sample_query}')
name = extract_patient_name_with_custom_function(sample_query)
print(f'Patient Name = {name}')

<br>***Manulaly get Patient name from user using Input Prompt***

In [None]:
def get_patient_name_from_user():
    while True:
        user_input_name = input("Please enter the patient's full name: ")
        confirmed = input(f"Is '{user_input_name}' correct? (y/n): ").lower()
        if confirmed == 'y':
            name_parts = user_input_name.split()
            patient_name = {"first_name": name_parts[0], "last_name": name_parts[-1] if len(name_parts) > 1 else None}
            # print(type(patient_name))
            return patient_name
        elif confirmed == 'n':
            continue
        else:
            print("Invalid input. Please enter '[y]es' or '[n]o'.")

In [None]:
print(f'User Query: {sample_query}')
name = get_patient_name_from_user()
print(f'Patient Name = {name}')

<br> ***Use LLM to extract Patient Name***

In [None]:
patient_name_prompt_template = """You are a medical assistant tasked with extracting patient names from text.
The text may contain:
1. The patient's full name (first and last)
2. Only the patient's first name
3. A partial name (e.g., a nickname, a last name with a prefix)
4. Some Names will contain numbers and they are part of the name
5. Names will contain special characters (e.g., apostrophes, hyphens)
5. Names from diverse cultures and regions
6. If you detect middle names, combine them into the last_name: last_name = '{{All identified middle names}} {{last name}}' (with a space between middle and last names)

Identify and extract the patient's name information from the text. If you can identify both the first and last name, provide them. If you can only identify the first name or a partial name, provide that information and leave the missing part blank.

Always provide the output in the following JSON format:
{{"first_name": "[extracted first name or partial name]", "last_name": "[extracted last name or null]"}}

Here's the text to analyze:
{query}
"""


# # print('Output Parser Format Instructions:')
# # pprint(patient_name_prompt.partial_variables)

# print('\nFormatted Prompt:')
# patient_name_prompt.pretty_print()

In [None]:
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [None]:
# Create Patient name Output Parser

# Define your desired data structure.
class PatientName(BaseModel):
    first_name: str = Field(description="extracted first name or partial name of the patient")
    last_name: str = Field(description="extracted patient's last name of the patient or null")

In [None]:
def get_patient_name_llm(user_query: str) -> dict:
    
    # Set up a parser + inject instructions into the prompt template.
    patient_name_parser = JsonOutputParser(pydantic_object=PatientName)
    
    # Create Prompt
    patient_name_prompt = ChatPromptTemplate.from_template(patient_name_prompt_template)
    patient_name_prompt.partial_variables = {"format_instructions": patient_name_parser.get_format_instructions()}

    
    patient_name_chain = LLMChain(llm=llm,
                                  prompt= patient_name_prompt)
    
    llm_response = patient_name_chain.invoke({'query': user_query})
    
    try: 
        patient_name = patient_name_parser.parse(llm_response['text'])
    
    except OutputParserException as e:
        print("I couldn't find the Patient name.")
        patient_name = get_patient_name_from_user()
    
    return patient_name


In [None]:
print(f'Query: {sample_query}')
patient_name_response = get_patient_name_llm(sample_query)
print(f'Response: {patient_name_response}')

### 2.2. Step-02: Get Patient ID

To find the relevant FHIR Patient Resource, even when dealing with potentially incomplete patient names from the user's query or LLM response, we:

- **Construct the Query:** We strategically build our query using the same template as our pre-processed resource text representation. This ensures higher accuracy even with partial names.
- **Perform Similarity Search:** This carefully crafted query is then used to search our VectorSearch Index, with the expectation that the top result is the matching FHIR Patient Resource.
- **Extract ID:** Finally, we retrieve the fhir_patient_id directly from the metadata of the identified document."
    


In [None]:
def get_patient_id(patient_name :dict) -> str:
    patient_vs_query_text = f"""The type of information in this entry is patient. The name use for this patient is official. The name family for this patient is {patient_name["last_name"]}. The name given 0 for this patient is {patient_name["first_name"]}"""
    # Create Retriever
    vs_retirever = vector_store.as_retriever(search_type="similarity")

    # Filter by resource_type = Patient
    vs_filter = [Namespace(name="fhir_resource_type", allow_tokens=["Patient"])]

    # k = 1 - We only want the top 1 result
    vs_retirever.search_kwargs = {"filter": vs_filter, "k":1}
    docs = vs_retirever.invoke(patient_vs_query_text)
    
    # print(f'Vector Search Results:\n{docs}\n')
    
    # Get patient id from Document Metadata
    patient_id = docs[0].metadata['fhir_patient_id'][0]
    return patient_id

In [None]:
patient_id_response = get_patient_id(patient_name_response)

print(f'Query: {sample_query}')
print(f'Response: {patient_id_response}')

### 2.3. Step-03: Identify FHIR Resource Type

In [None]:
FHIR_RESOURCE_LIST_URL = "https://build.fhir.org/resourceguide.html"

resource_type_prompt_template = f"""You are a healthcare specialist with deep knowledge of the FHIR standard.
Your task is to identify the most appropriate FHIR resource type for the given query.
Refer to the official FHIR resource guide at {FHIR_RESOURCE_LIST_URL}. 
If needed, consult the detailed documentation linked from that guide.

Return ONLY the resource type name if there is a clear match. If unsure, return "Unknown".

Always provide the output in the following JSON format:
{{{{"resource_type": "[extracted resource type name]"}}}}

Here's the query to analyze:
{{query}}
"""



# print('Output Parser Format Instructions:')
# pprint(resource_type_prompt.partial_variables)
# resource_type_prompt = ChatPromptTemplate.from_template(resource_type_prompt_template)
# resource_type_prompt.partial_variables = {"format_instructions": resource_type_parser.get_format_instructions()}
# print('\nFormatted Prompt:')
# resource_type_prompt.pretty_print()

In [None]:
# Create Resource Type Output Parser

# Define your desired data structure.
class ResourceType(BaseModel):
    resource_type: str = Field(description="extracted resource type name")

In [None]:
def get_resource_type_llm(user_query: str) -> dict:
    
    # Set up a parser + inject instructions into the prompt template.
    resource_type_parser = JsonOutputParser(pydantic_object=ResourceType)
    
    # Create Prompt
    resource_type_prompt = ChatPromptTemplate.from_template(resource_type_prompt_template)
    resource_type_prompt.partial_variables = {"format_instructions": resource_type_parser.get_format_instructions()}
    
    # Create Chain
    resource_type_chain = LLMChain(llm=llm,
                                   prompt=resource_type_prompt)
    
    llm_response = resource_type_chain.invoke({'query': user_query})
    
    try:
        resource_type = resource_type_parser.parse(llm_response['text'])
    
    except OutputParserException as e:
        print("I couldn't infer the FHIR Resource Type.")
        resource_type = input("Please enter the FHIR Resource Type: ")
        resource_type = {'resource_type': resource_type}
        
    return resource_type

In [None]:
# LangChain Debug
debug_on = False
set_debug(debug_on)
set_verbose(debug_on)


test_query = """For the patient Benjamin360 Hintz995, summarize all active medication requests.  
If no active medication requests are found, please indicate 'No active medications found'.
"""

resource_type_response = get_resource_type_llm(test_query)


print(f'Query: {test_query}')
print(f'Response: {resource_type_response}')

### 2.4. Step-04: Vector Search

In this step we perform a Similarity search on VertexAI VectoreSearch Index to retrieve FHIR Reources that match the user query.

**Steps:**
- Perform a Vector Search with Filters based on the retrieved patient_id and resource_type
- Since FHIR Resources reference other resources, we also need to provide the referenced Resources to provide the full context to the LLM to imporve the accuracy of the respone. We do this by querying the Neo4J database to get immediate Neigbour resources for each resource returned by the 


In [None]:
def retrieve_relevant_resources(query: str, k: int, 
                                resource_type_text: str,
                               patient_id: str) -> list[str]:
    
    # Create Retriever
    vs_retirever = vector_store.as_retriever(search_type="similarity")
    
    # Filter by fhir_resource_type and fhir_patient_id to retrieve only relevant FHIR Resources
    vs_filter = [
        Namespace(name="fhir_resource_type", allow_tokens=[resource_type_text]),
        Namespace(name="fhir_patient_id", allow_tokens=[patient_id])
    ]
    
    # print(f'retrieve_relevant_resources resource_type: {resource_type_text}')
    if debug_on:
        print(f'vs_filter:')
        pprint(vs_filter)
        print('\n')
        
    # Retrieve all Resources based on above fitler
    vs_retirever.search_kwargs = {"filter": vs_filter, 'k':k}
    docs = vs_retirever.invoke(query)
    
    
    # print(f'Retrieved Resource Documents:')
    # pprint(docs)

    # retrieved_resource_ids = [doc.metadata["fhir_resource_id"][0] for doc in docs]
    # return retrieved_resource_ids
    return(docs)

In [None]:
debug_on = False


docs = retrieve_relevant_resources(sample_query,
                                   k=25,
                                   patient_id=patient_id_response,
                                   resource_type_text=resource_type_response['resource_type'])

retrieved_resource_ids = [doc.metadata['fhir_resource_id'][0] for doc in docs]

# for doc in docs:
#     resource_metadata = doc.metadata
#     patient_id = resource_metadata['fhir_patient_id']
#     resource_id = resource_metadata['fhir_resource_id']
#     resource_type = resource_metadata['fhir_resource_type']    
#     print(f'patient_id: {patient_id}\t resource_id:{resource_id}\t resource_type:{resource_type}')
#     # print(doc.page_content)

print(f'Query: {sample_query}')
print(f'Total Resources Retrieved: {len(retrieved_resource_ids)}')
# print(f'ResourcesIds List: {retrieved_resource_ids}')

### 2.5. RAG with normal context

*With the above retrieved resources as llm context, let us try to ask the LLM user query and check its response.*

In [None]:
sample_user_query_prompt_template='''
System: The context below contains entries about the patient's healthcare. 
Please limit your answer to the information provided in the context. Do not make up facts.
Please limit your answers only about the patient in the user question. If you do not find the patient name in the context.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
If you are asked about the patient's name and one the entries is of type patient, you should look for the first given name and family name and answer with: [given] [family]
----------------
{context}

Here's the text to analyze:
{query}
'''

# sample_user_query_prompt = PromptTemplate(
#     template=sample_user_query_prompt_template,
#     input_variables=['query']
# )


# sample_user_query_prompt.pretty_print()

In [None]:
# get all page_content of docs
docs_page_contents_list = [doc.page_content for doc in docs]
docs_page_contents = '\n\n'.join(docs_page_contents_list)

In [None]:
def rag_normal_context_llm(user_query: str):
    
    prompt_inputs = {'query': user_query, 'context': docs_page_contents}
    prompt_inputs.update(patient_name_response)
    
    # Create Prompt
    user_query_prompt = PromptTemplate(
        template=sample_user_query_prompt_template,
        input_variables=['query']
    )
    
    # Chain
    user_query_chain = LLMChain(llm=llm,
                                prompt=user_query_prompt)
    
    llm_response = user_query_chain.invoke({'query': user_query, 'context': docs_page_contents})
    
    return llm_response['text']

In [None]:
# LangChain Debug
langchain_debug = False
set_debug(langchain_debug)
set_verbose(langchain_debug)

rag_normal_context_response = rag_normal_context_llm(sample_query)
print(f'User Query: {sample_query}')
print(f'Response: {rag_normal_context_response}')
# Response will be empty, because the context does not contain Patient Resource Info.

<br>***The LLM couldn't answer the query because the necessary Patient FHIR Resource wasn't retrieved, leaving it without the required context.***

### 2.6. Step-05: Enhanced Context 

<br>***Fetch referenced resources for additional context for LLM***

To enhance the LLM's accuracy in answering user questions, it's crucial to fetch the text representation of all referenced FHIR resources. For instance, an Observation resource might reference Specimen, Device, Procedure, etc. This provides complete context to the LLM, enabling it to accurately answer queries involving these linked resources.

Additionally, this ensures the inclusion of key information like patient names from the referenced Patient resource, preventing incorrect responses stating that the context lacks information about the patient.

In [None]:
def fetch_enhanced_context(in_resource_ids: list) -> str:
    
    # Fetch relevant text from the graph database
    cipher = f"""
    MATCH (node: resource)
    WHERE node.id IN {in_resource_ids}

    OPTIONAL MATCH (node)-[r]-(neighbor :resource)
    WITH COLLECT(DISTINCT node) + COLLECT(DISTINCT neighbor) AS allNodes
    UNWIND allNodes as uniqueNode
    RETURN uniqueNode.text
    """

    try:
        response = graph.query(cipher)[0]
    except Exception as e:
        raise ValueError(f"Error in Graph Query: {e}")

    relevant_resource_text_list = [resource_id[0] for resource_id in response]
    
    # print(f'Number of resources matching query: {len(relevant_resource_text_list)}')
    # print(f'Enhanced Context Text:')
    # pprint(relevant_resource_text_list)
    
    return relevant_resource_text_list
    

In [None]:
enhanced_context_text_list = fetch_enhanced_context(retrieved_resource_ids)

print(f'Query: {sample_query}')
print(f'Total Resources Retrieved: {len(enhanced_context_text_list)}')

### 2.7. RAG with Enhanced Context

In [None]:
def doc_summary(docs):
    print (f'You have {len(docs)} document(s)')
    
    num_words = 0
    for doc in docs:
        try:
            num_words = sum(len(doc.page_content.split(' ')))
        except Exception as e:
            print(doc)
            break
    
    print (f'You have roughly {num_words} words in your docs')
    print ()
    print (f'Preview: \n{docs[0].page_content.split(". ")[0]}')

In [None]:
def get_document_prompt(patient_first_name,
                        patient_last_name,
                        is_refine_prompt: bool
                       ) -> str:

    # Get Current Date and Time in format MM/DD/YYYY
    current_datetime = datetime.now(timezone.utc).astimezone(timezone(offset=timedelta(hours=5, minutes=30)))
    current_datetime_str = current_datetime.strftime("%m/%d/%Y")
    
    document_prompt_template = f"""
     The context below contains entries about the patient's healthcare. 
     Please limit your answer to the information provided in the context. Do not make up facts.
     Please limit your answers only about the patient in the user question. If you do not find the patient name in the context.
     If you don't know the answer, just say that you don't know, don't try to make up an answer.
     If you are asked about the patient's name and one the entries is of type patient, you should look for the first given name and family name and answer with: [given] [family]

     The context below belongs to the patient {patient_first_name} {patient_last_name}:
     The current Date & Time is {current_datetime_str}

     Context information is below.
     {{context_str}}
     
     Given the context information and not prior knowledge, answer the question: {{question}}
     """
        
    if is_refine_prompt:
        refine_append_str = """
        We have provided an existing answer: 
        {existing_answer}
        
        We have the opportunity to refine the existing answer (only if needed) as follows:        
        """
        
        document_prompt_template = refine_append_str + document_prompt_template

    # document_prompt = PromptTemplate(
    #     input_variables=["page_content"],
    #      template=document_prompt_template
    # )
    
    return document_prompt_template

In [None]:
from langchain_core.documents import Document

In [None]:
def get_split_docs(enhanced_context_text_list):

    enhanced_context_text_concat = '\n\n'.join([res_text for res_text in enhanced_context_text_list if res_text is not None])
    
    text_splitter = RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size = 30000, # < Context Window of Gemini-Pro-1.0
        chunk_overlap = 1000
    )
    
    enhanced_context_split_text = text_splitter.split_text(enhanced_context_text_concat)
    enhanced_context_docs = text_splitter.create_documents(enhanced_context_split_text)
    
    return enhanced_context_docs
    

In [None]:
def rag_enhanced_context_llm(user_query: str, 
                           patient_name_dict: dict,
                          enhanced_context_text_list: list[str]):
    
    enhanced_context_resources_docs = get_split_docs(enhanced_context_text_list)
    
    user_query_chain = load_qa_chain(llm, chain_type="refine",
                                 verbose=False,
                                 return_intermediate_steps=True)
    
    # user_query_chain.document_prompt = document_prompt
    user_query_chain.initial_llm_chain.prompt.template = get_document_prompt(patient_name_dict['first_name'], patient_name_dict['last_name'], False)
    user_query_chain.refine_llm_chain.prompt.template = get_document_prompt(patient_name_dict['first_name'], patient_name_dict['last_name'], True)
    
    if len(enhanced_context_resources_docs)>0:
        prompt_inputs = {
        "question": user_query,
        "input_documents": enhanced_context_resources_docs
        }
        
        llm_response = user_query_chain.invoke(
            prompt_inputs, 
            return_only_outputs=False,
        )
        
        return llm_response['output_text']
    
    else:
        return 'No Information available'

In [None]:
# LangChain Debug
langchain_debug = False
set_debug(langchain_debug)
set_verbose(langchain_debug)

llm_response = rag_enhanced_context_llm(user_query=sample_query,
                                      patient_name_dict=patient_name_response,
                                     enhanced_context_text_list=enhanced_context_text_list)
print(f'Query: {sample_query}')
console.print(Markdown(llm_response))
# print(f'Response: {llm_response}')

## 3. Prompt Engineering

Prompt Engineering is the most critical component of designing LLM powered applications. The below prompt shows an example of all the components that needs to be considered while designing your prompts. Few Components are:

***Prompt Engineering Best Practices for Maximizing LLM Performance:***
- **Be Clear and Specific:** Use unambiguous language and explicit instructions.
- **Provide Context:** Give the LLM relevant background information for accurate responses.
- **Iterate and Refine:** Experiment with different prompts and improve them over time.
- **Handle Dates/Time:** Include current time, guide relative time calculation, and consider time zones.
- **Specify Output Format:** Clearly define the desired output format and provide examples.

In [222]:
def get_enhanced_prompt(patient_first_name: str, 
                        patient_last_name: str,
                       is_refine_prompt: bool) ->str : 

    FHIR_RESOURCE_LIST_URL = "https://build.fhir.org/resourceguide.html"
    # Get Current Date and Time in format MM/DD/YYYY
    current_datetime = datetime.now(timezone.utc).astimezone(timezone(offset=timedelta(hours=5, minutes=30)))
    current_datetime_str = current_datetime.strftime("%m/%d/%Y")
    
    user_query_prompt_text=f"""

You are a friendly Healthcare AI assistant and a healthcare specialist with deep knowledge of the FHIR standard.
Your task is to respond to Doctors queries based on patient information from FHIR (Fast Healthcare Interoperability Resources) data.
Refer to the official FHIR resource guide at {FHIR_RESOURCE_LIST_URL}. 
If needed, consult the detailed documentation linked from that guide.

**Context Handling:**

1. The context below contains entries about the patient's healthcare in FHIR format.
2. Identify and parse relevant FHIR resources within the context (e.g., Patient, Observation, Encounter).
3. Utilize the standard FHIR terminology and codes (e.g., LOINC for observations) to extract specific information.
4. Limit your answer to the information provided in the context. Do not make up facts.
5. Focus your answers on the patient specified in the user question. 
6. If you don't know the answer, simply state that you don't have enough information.
7. Ensure to look for the correct FHIR resource type in context to answer the query. For Example, to answer questions about claims history, search for FHIR resource of type 'Claim'
8. Utilize the 'CurrentDateTime' value in the context to calculate relative time periods (e.g., "last week") for queries referencing them.

**Date Handling:**

1. Pay very close attention to the dates in the context and user query.
2. Prioritize information from the most recent dates when responding to queries without a specified date.
3. Compare dates in the context and user query to determine the temporal relationship between events.
4. Use the 'CurrentDateTime' value, which is in MM/DD/YYYY format, to calculate relative time periods and filter relevant FHIR resources based on those periods.
5. Note that dates in the context are formatted as MM/DD/YYYY (e.g., 10/22/2015)

**Output Formatting:**

Before printing verify that you have considered the date and time in your response meets date time criteria in the user query (if mentioned).  

1. Respond to the user question with the above in mind.
2. Include the patient's name (given name and family name) in your response.
3. Format your output in markdown for clarity.
4. Make the patients name Bold.
5. Format the data into Markdown table with clear headers for information you think can be better represented in a table.
6. Recheck the Markdown syntax. Ensure proper spacing.

**Example Output:**
   - For vital signs: "[Patient Name]'s [Observation Name] was [Value] [Unit] on [Date]."
   - For encounters: "[Patient Name] had a [Encounter Type] on [Date] (reason: [Reason if available])."

----------------
Today's date - CurrentDateTime = {current_datetime_str}

**Context about the Patient:***
The context below belongs to the patient {patient_first_name} {patient_last_name}

**The context below contains entries about the patient's healthcare in FHIR format.**
{{page_content}}

"""
    
    
    if is_refine_prompt:
        refine_append_str = """
        We have provided an existing answer:
        
        {existing_answer}
        
        We have the opportunity to refine the existing answer (only if needed) as follows:
        
        """
        
        user_query_prompt_text = refine_append_str + user_query_prompt_text
        
    # user_query_prompt = PromptTemplate(
    #     input_variables=["page_content"],
    #     template=user_query_prompt_text
    # )
    return user_query_prompt_text
    # user_query_prompt.pretty_print()

## 4. Bringing it all together

In [247]:
debug_on = False
def answer_fhir_query(user_query: str):
    k=25 # Number of VectorSearch Results
    
    # print(f'User Query passed to answer_fhir_query answer_fhir_query method:\n{user_query}')
    
    patient_name_dict = get_patient_name_llm(user_query)
    resource_type_text = get_resource_type_llm(user_query)['resource_type']
    patient_id = get_patient_id(patient_name_dict)
    
    if debug_on:
        print(f'patient_name = {patient_name_dict}, {type(patient_name_dict)}')
        print(f'resource_type = {resource_type_text}, {type(resource_type_text)}')
        print(f'patient_id = {patient_id}, {type(patient_id)}')
        
    
    # Perform Similarity Search on Vector Search
    if resource_type_text in ['Encounter', 'Observation', 'MedicationRequest']:
        k = 100 # Exapand the Search Result
        print(f'Exapnding VectorSearch Result to {k} results')
    
    vs_search_resource_docs = retrieve_relevant_resources(user_query, 
                                                          k,
                                                          patient_id=patient_id,
                                                          resource_type_text=resource_type_text)
            
    vs_search_resource_ids = [doc.metadata['fhir_resource_id'][0] for doc in vs_search_resource_docs]
    if debug_on:
        print('\n')
        print(f'Resource Ids retrieved: {len(vs_search_resource_ids)}')
        # print(f'vs_search_resource_ids: {vs_search_resource_ids}')
        print('\n')
    
    if resource_type_text == 'Patient':
        enhanced_context_docs = vs_search_resource_docs
        
    else:
        # Neo4J query - for getting enhanced context
        enhanced_context_text_list = fetch_enhanced_context(in_resource_ids=vs_search_resource_ids)
        enhanced_context_docs = get_split_docs(enhanced_context_text_list)
        
    if debug_on:
            print('\n')
            print(f'# of Enhanced Context Resources: {len(enhanced_context_text_list)}')
            # print(f'Enhanced Context Text:')
            # pprint(enhanced_context_text_list)
            print('\n')
    
    # Create Chain
    user_query_chain = load_qa_chain(llm, chain_type="refine",
                                 verbose=True,
                                 return_intermediate_steps=True)
    
    user_query_chain.initial_llm_chain.prompt.template = get_document_prompt(patient_name_dict['first_name'], patient_name_dict['last_name'], False)
    user_query_chain.refine_llm_chain.prompt.template = get_document_prompt(patient_name_dict['first_name'], patient_name_dict['last_name'], True)
    
    if len(enhanced_context_docs)>0:
        prompt_inputs = {
            "question": user_query,
            "input_documents": enhanced_context_docs
        }
        
        llm_response = user_query_chain.invoke(
            prompt_inputs, 
            return_only_outputs=True,
        )
        
        return llm_response['output_text']
    
    else:
        return 'No Information available'    
    

In [None]:
# query = "What is the Body Height of Benjamin360 Hintz995 and when was it measured?"
# query = "What is the Body Weight of Benjamin360 Hintz995 and when was it measured?"
# query = "Tell me about the last 5 Benjamin360's Procedures?"
# query = "Tell me about observations performed by Benjamin360 in the last 2 years?"
# query = "What allergies does Akiko835 Larkin917 have?"
# query = "For the patient Annice210 McClure239, please identify all active medication requests within the provided text. Extract the medication name, dosage instructions, and duration if available. Only include medication requests where the status is explicitly mentioned as 'active'.  Present the extracted information in a markdown table with columns 'Medication Name', and 'Status'. If no active medication requests are found, please indicate 'No active medications found'."
# query = "What can you tell me about the claim made by Anneliese170 Berge125 on 2013-10-10."

# query = """Please provide a summary of Benjamin360 Hintz995's 5 most recent encounters. 
# Include the date (MM/DD/YYYY), type, class, reason (if available), and provider for each encounter. 
# If there are fewer than 5 encounters in the record, list all available encounters. 
# Present the information in a markdown table with the columns 'Date', 'Type', 'Class', 'Reason', and 'Provider'."""

In [None]:
langchain_debug = False
set_debug(langchain_debug)
set_verbose(langchain_debug)

# Vital Signs Query
vital_signs_query = """For the patient Akiko835 Jacelyn576, please provide a summary of latest vital signs. 
Include the latest value, unit of measurement, and date taken (MM/DD/YYYY) for each of the following vital signs:
If a particular vital sign is not found in the records, please indicate so with 'N/A'. 
Present the information in a markdown table with the columns 'Vital Sign', 'Value', 'Unit', and 'Date Taken'."""

# Medication Summary Query
medications_query = """For the patient Akiko835 Jacelyn576,  please identify all active medications within the provided text.
The FHIR Resource Type is Medication Request
Extract the medication name, dosage instructions, and duration if available. 
Only include medication requests where the status is explicitly mentioned as 'active'.  
Present the extracted information in a markdown table with columns 'Medication Name', and 'Status'. 
If no active medication requests are found, please indicate 'No active medications found'.
"""

query = medications_query


console.print(Markdown('# User Query'))
print(query)

console.print(Markdown('# LLM Response'))
llm_user_query_response = (answer_fhir_query(query))
console.print(Markdown(llm_user_query_response))


## 5. Generate a Patient Summary

Using our RAG application, we'll create a Patient Summary with the following sections:
- Demographics
- Medical History
- Medications
- Allergies
- Immunizations
- Vital Signs

Each section will be populated using ***carefully designed queries and well designed prompt***. This approach not only ***improves response accuracy but also minimizes LLM calls, reducing overall costs***.

The results will be dynamically combined into a Markdown report. 

<br>***Below is Markdown Formater LLM Chain***

In [251]:
def markdown_formatter_llm(patient_summary_report: str)-> str:
    markdown_prompt_template = """You are a Markdown expert. Your role is to validate and correct Markdown syntax, ensuring the output is well-formatted and adheres to standard Markdown conventions. 
Additionally, you are capable of handling basic HTML elements embedded within the Markdown text.

Your task is to analyze the input text and apply the following steps:

1. **Basic Markdown Validation:**
   * Verify the correct usage of headings (`#`, `##`, etc.), lists (`-`, `*`, `1.`), emphasis (`**`, `*`, `_`), links (`[text](link)`), images (`![alt text](image url)`), code blocks (```), and quotes (`>`).
   * Ensure proper indentation for nested lists and code blocks.
   * Check for any unbalanced or mismatched Markdown elements (e.g., missing closing tags for emphasis or links).

2. **Table Validation:**
   * Verify the correct structure of tables with pipes (`|`) and dashes (`-`) for headers and cell separators.
   * Check for any misaligned columns or inconsistent table formatting.
   * Ensure proper escaping of pipes (`\|`) within table cells if used as content.
   * Correct any empty header cells

3. **HTML Handling:**
   * Identify and preserve basic HTML elements (e.g., `<p>`, `<b>`, `<i>`, `<br>`, `<table>`, `<ul>`, `<ol>`, `<li>`) that are commonly used within Markdown.
   * Ensure that HTML tags are properly opened and closed.

4. **Correction:**
   * Fix any identified errors or inconsistencies in Markdown and HTML syntax.
   * Apply formatting improvements where possible to enhance readability.
   * If multiple corrections are possible, prioritize maintaining the original intent and structure of the input.

Example Output:

Provide only the corrected Markdown text.

Input Markdown Text:
{markdown_text}
    """
    
    markdown_prompt = PromptTemplate(
        template=markdown_prompt_template,
        input_variables=['markdown_text']
    )
    
    markdown_chain = LLMChain(
        llm=llm,
        prompt = markdown_prompt
    )
    
    llm_response = markdown_chain.invoke({'markdown_text': patient_summary_report})
    
    return llm_response['text']
    

<br>***Questions for each section of Patient Summary report***

These questions will be passed iteratively to the LLM to get the related section information.

In [252]:
# The FHIR Resource Type is 'MedicationRequest'
demographics_query = """please provide the following demographic information, if available in the context: full name, date of birth, gender, primary phone number, and home address. 
Present the information in a markdown table.
"""

medical_history_query = """please provide a summary of all Conditions where the clinical status for this condition is active.
Only include Conditions where the verification status is confirmed.
Only include disorder Conditions.
Extract the Condition, Clinical Status, Verification Status , onset date and recorded date. 
Format the output in a Markdown table format with columns 'Condition', 'Clinical Status', 'Verification Status', 'Onset Date' and 'Recorded date'
If no active conditions are found, please state 'No active medical conditions found'.
"""


# """please provide a summary of all active Conditions. 
# List the onset date, status and verification. 
# List the ouput in a Markdown table format. 
# If no active conditions are found, please state 'No active medical conditions found'. 
# """

medications_query = """please list all active Medication Request.
Extract the medication name, dosage, author-date, status and reason if available. 
Present the extracted information in a markdown table with column names 'Medication Name', 'Dosage', 'Authored-On', 'Status', 'Reason'. 
"""

# allergies_query = """please list all Allergy Intolerances.
# If no allergies are found, please state 'No known allergies found'.
# """
allergies_query = """please list all allergy intolerance, where the clinical status is active. 
Only include Allergy Intolerance where the verification status is confirmed.
Extract the Allergy Code, Criticality, Category, Clinical Status, Verification Status, Recorded date, Reaction Manifestations
Present this information in a markdown table with the columns 'Allergy Code', 'Criticality', 'Category', 'Clinical Status', 'Verification Status', 'Recorded Date', 'Reaction Manifestations'. 
If no allergies are found, please state 'No known allergies found'.
"""

# immunization_query = """please list all immunizations for, including the vaccine code, date administered, and status. 
# Present this information in a markdown table with the columns 'Vaccine Code', 'Date Administered', and 'Status'. 
# If no immunizations are found, please state 'No immunizations found'."""

immunization_query = """please list all immunization. 
Extract the Vaccine code, Occurrence date, Status
Present this information in a markdown table with the columns 'Vaccine Code', 'Date Administered', and 'Status'. 
If no immunizations are found, please state 'No immunizations found'."""

# vital_signs_query = """please provide a summary of latest vital signs.
# The FHIR Resource Type is Observation
# Include the latest value, unit of measurement, and date taken (MM/DD/YYYY) for each of the vital signs.
# If a particular vital sign is not found in the records, please indicate so with 'N/A'. 
# Present the information in a markdown table with the columns 'Vital Sign', 'Value', 'Unit', and 'Date Taken'."""

vital_signs_query = """please list of all observations where the observation category is Vital signs.

Only extract the Vital Sign with below Codes:
- Body Height (LOINC code 8302-2)
- Body Weight (LOINC code 29463-7)
- Body Mass Index (BMI) [Ratio] (LOINC code 39156-5)
- Body temperature (LOINC code 8310-5)
- Systolic blood pressure (LOINC code 8480-6)
- Diastolic blood pressure (LOINC code 8462-4)
- Heart rate (LOINC code 8867-4)
- Respiratory rate (LOINC code 9279-1)
- Oxygen saturation in Arterial blood by Pulse oximetry (LOINC code 59408-5)

For each of the above Vital Signs extract the most recent measurement.
Present this information in a markdown table with the columns 'Vital Sign Code', 'Value', 'Unit of Measurement',  'Date taken (MM/DD/YYYY)'.
If a particular vital sign is not found in the records, please indicate so with 'N/A'. 
"""

# encounter_query = """please provide a summary of 5 most recent encounters. 
# Include the date (MM/DD/YYYY), type, class, reason (if available), and provider for each encounter. 
# If there are fewer than 5 encounters in the record, list all available encounters. 
# Present the information in a markdown table with the columns 'Date', 'Type', 'Class', 'Reason', and 'Provider'."""

# encounter_query = """please list the 5 most recent encounters .
# If there are fewer than 5 encounters in the record, list all available encounters.  
# Include the date (MM/DD/YYYY), type, class, status, reason (if available), and provider for each encounter.
# Present the information in a markdown table with the columns 'Date', 'Type', 'Class', 'Status', 'Reason', and 'Provider'."""

patient_summary_query = [
    {'Demographics': demographics_query},
    {'Medical History': medical_history_query},
    {'Medications': medications_query},
    {'Allergies': allergies_query},
    {'Immunizations': immunization_query},
    {'Vital Signs': vital_signs_query},
    # {'Encounters': encounter_query}
    
]

In [253]:
def generate_patient_summary(patient_name: str):
    
    report_text = '# Patient Summary\n'
    
    # console.print(Markdown('# Patient Summary'))
    
    # patient_context = f'For the given patient: {patient_name}, '
    patient_context = f"For the patient {patient_name}, "
    
    for section in patient_summary_query:
        section_title = list(section.keys())[0]
        
        # if section_title == 'Encounters':
        print(f'Processing Section: {section_title}')

        # report_text = report_text + '___\n'
        section_header = f'___\n## {section_title}\n___\n\n'

        report_text = report_text + section_header
        # report_text = report_text + '___\n'
        # console.print(Markdown(section_header))

        section_question = list(section.values())[0]
        # llm_user_query_response = patient_context + section_question + output_formatting_instructions
        # llm_user_query_response = answer_fhir_query(patient_context + section_question + output_formatting_instructions)
        summary_user_query = patient_context + section_question
        print(summary_user_query)
        llm_user_query_response = answer_fhir_query(summary_user_query)
        report_text = report_text + llm_user_query_response + '\n'

        # console.print(Markdown(llm_user_query_response))
    
    # console.print(Markdown(report_text))
    report_end_text = '___\n***<p style="text-align: center;">END OF REPORT</p>***\n___\n'
    report_text = report_text + report_end_text
    
    
    # Format Markdown and correct syntax errors
    report_text = markdown_formatter_llm(report_text)
    return report_text
        
#generate_patient_summary('Akiko835 Larkin917')  

In [254]:
langchain_debug = False
set_debug(langchain_debug)
set_verbose(langchain_debug)

# patient_summary_name = 'Benjamin360 Hintz995'
patient_summary_name  = 'Carey440 Stroman228' # Carey440_Stroman228 has Allergies
# patient_summary_name = 'Annice210 McClure239'
# patient_summary_name = 'Akiko835 Jacelyn576'


patient_summary_response = generate_patient_summary(patient_summary_name) 

patient_summary_md_file = f'{patient_summary_name}.md'
with open (patient_summary_md_file, 'w') as f:
    f.write(patient_summary_response)
    
print(f'The patient summary file "{patient_summary_name}.md" has been generated')

Processing Section: Demographics
For the patient Carey440 Stroman228, please provide the following demographic information, if available in the context: full name, date of birth, gender, primary phone number, and home address. 
Present the information in a markdown table.



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
     The context below contains entries about the patient's healthcare. 
     Please limit your answer to the information provided in the context. Do not make up facts.
     Please limit your answers only about the patient in the user question. If you do not find the patient name in the context.
     If you don't know the answer, just say that you don't know, don't try to make up an answer.
     If you are asked about the patient's name and one the entries is of type patient, you should look for the first given name and family name and answer with: [given] [family]

     The context

Please enter the patient's full name:  Carey440 Stroman228
Is 'Carey440 Stroman228' correct? (y/n):  y


Exapnding VectorSearch Result to 100 results


[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
     The context below contains entries about the patient's healthcare. 
     Please limit your answer to the information provided in the context. Do not make up facts.
     Please limit your answers only about the patient in the user question. If you do not find the patient name in the context.
     If you don't know the answer, just say that you don't know, don't try to make up an answer.
     If you are asked about the patient's name and one the entries is of type patient, you should look for the first given name and family name and answer with: [given] [family]

     The context below belongs to the patient Carey440 Stroman228:
     The current Date & Time is 05/07/2024

     Context information is below.
     The type of information in this entry is medication request. The status for this medication re

<br> **Note**: 

Few Patient Summary sections might not get popolated e.g. Vital Signs - Systolic blood pressure.
For these scenarios you can:
- Prompt the LLM for each field of the section.
- Create dedicated LLMChains that can populate each section,

---
## 6. Cleaning Up

<div class="alert alert-block alert-warning">
<b>⚠️ Important: To avoid incurring charges, please delete the Google Cloud resources used in this tutorial. ⚠️</b>
</div>



In [None]:
CLEANUP_RESOURCES = True

### Delete Neo4J Docker

In [None]:
# Wipe Neo4J Database
graph = Graph(NEO4J_URL, NEO4J_USER, NEO4J_PASSWORD)
if CLEANUP_RESOURCES:
    graph.wipe_database()

In [None]:
# DELETE NEO4J CONTAINER
if CLEANUP_RESOURCES:
    ! docker stop testneo4j
    ! docker rm -fv testneo4j
    ! sudo rm -rf $HOME/neo4j

### Delete Vector Search Indexes & Index-Endpoints

- Delete ME Vector Search Index and Endpoints

In [None]:
me_utils = MatchingEngineUtils(PROJECT_ID, REGION, ME_INDEX_NAME)
ME_INDEX_ID, ME_INDEX_ENDPOINT_ID = me_utils.get_index_and_endpoint()

# Delete Endpoint
if CLEANUP_RESOURCES and "me_utils" in globals():
    print(
        f"Undeploying all deployed indexes and deleting the index endpoint {ME_INDEX_ENDPOINT_ID}"
    )
    me_utils.delete_index_endpoint()

# Delete Index     
if CLEANUP_RESOURCES and "me_utils" in globals():
    print(f"Deleting the index {ME_INDEX_ID}")
    me_utils.delete_index()    

# Delete Bucket    
if CLEANUP_RESOURCES:
    # Delete contents of the bucket 
    ! gsutil -m rm -r gs://{ME_EMBEDDING_GCS_DIR}
    ! gsutil rb gs://{ME_EMBEDDING_GCS_DIR}

print('Vector Search and GCS Bucket Cleaning complete!')

- Delete ME_ENHANCED Vector Search Index and Endpoints

In [None]:
me_utils_enhanced = MatchingEngineUtils(PROJECT_ID, REGION, ME_ENHANCED_CONTEXT_INDEX_NAME)
ME_ENHANCED_INDEX_ID, ME_ENHANCED_INDEX_ENDPOINT_ID = me_utils_enhanced.get_index_and_endpoint()

# Delete Endpoint
if CLEANUP_RESOURCES and "me_utils_enhanced" in globals():
    print(
        f"Undeploying all deployed indexes and deleting the index endpoint {ME_ENHANCED_INDEX_ENDPOINT_ID}"
    )
    me_utils_enhanced.delete_index_endpoint()

# Delete Index    
if CLEANUP_RESOURCES and "me_utils_enhanced" in globals():
    print(f"Deleting the index {ME_ENHANCED_INDEX_ID}")
    me_utils_enhanced.delete_index()

# Delete Bucket
if CLEANUP_RESOURCES:
    ! gsutil -m rm -r gs://{ME_ENHANCED_EMBEDDING_GCS_DIR}
    ! gsutil rb gs://{ME_ENHANCED_EMBEDDING_GCS_DIR}