# Retrieval part of Retrieval-Augmented Generation (RAG)

In [2]:
def store_chunk_in_firestore(chunk_id, chunk_data):

    try:
        db = firestore.Client(PROJECT_ID)
        doc_ref = db.collection(COLLECTION_NAME).document(chunk_id)
        if doc_ref.get().exists:
            print(f"Document with ID: {chunk_id} already exists. Skipping write.")
            return
        doc_ref.set(chunk_data)
        print(f"Successfully stored document with ID: {chunk_id} in collection '{collection_name}'")

    except Exception as e:
        print(f"Error storing document {chunk_id}: {e}")

def get_chunk_in_firestore(chunk_id):

    try:
        db = firestore.Client(PROJECT_ID)
        doc_ref = db.collection(COLLECTION_NAME).document(chunk_id)
        doc = doc_ref.get()

        if doc.exists:
            print(f"Successfully retrieved document: {chunk_id}")
            
            return doc.to_dict()
        else:
            print(f"No document found with ID: {chunk_id}")
    except Exception as e:
        print(f"Error retrieving document {chunk_id}: {e}")

def create_and_deploy_vector_index():
    """
    Creates a Vertex AI Vector Search Index, an Index Endpoint, and deploys the index.
    This is a one-time setup process.
    """

    # Create the index
    try:
        vector_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
            display_name = INDEX_NAME,
            dimensions = GEMINI_EMBEDDING_MODEL_DIMENSION,
            approximate_neighbors_count = 150,
            distance_measure_type = "DOT_PRODUCT_DISTANCE",
            index_update_method = "STREAM_UPDATE"
        )
        index_id = vector_index.resource_name.split("/")[-1]

    except Exception as e:
        if "already exists" in str(e):
            print(f"Index '{INDEX_NAME}' already exists. Reusing it.")
        else:
            raise Exception(e)
    
    # Create an Index Endpoint
    try:
        index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
            display_name = INDEX_ENDPOINT_NAME, public_endpoint_enabled = True
        )
    except Exception as e:
        if "already exists" in str(e):
            print(f"Endpoint '{INDEX_ENDPOINT_NAME}' already exists. Reusing it.")
            index_endpoint = aiplatform.MatchingEngineIndexEndpoint.list(filter = f'display_name="{INDEX_ENDPOINT_NAME}"')[0]
        else:
            raise Exception(e)

    # Deploy the Index to the Endpoint
    try:
        # A unique ID for this deployment
        deployed_index_id = f"gemini_deployed_{int(time.time())}" 
        index_endpoint.deploy_index(
            index = vector_index, deployed_index_id = deployed_index_id
        )
    except Exception as e:
        if "has been deployed" in str(e):
            print("Index is already deployed to this endpoint.")
        else:
            raise Exception(e)

    return index_id

def get_index_id():

    try:
        indexes = aiplatform.MatchingEngineIndex.list(
            filter = f'display_name="{INDEX_NAME}"'
        )
        index = indexes[0].resource_name.split("/")[-1]
        
        return index

    except Exception as e:
        raise Exception(f"An error occurred: {e}")

def generate_embeddings_and_prepare_datapoints(chunks):
    """
    Takes a list of chunk dictionaries, generates an embedding for each using Gemini,
    and formats them for uploading to Vertex AI Vector Search.
    """

    datapoints = []
    for key, values in chunks.items():
        for i, chunk in enumerate(values):
            try:
                response = client.models.embed_content(
                    model = GEMINI_EMBEDDING_MODEL,
                    contents = chunk["content"],
                    config = types.EmbedContentConfig(task_type = "RETRIEVAL_DOCUMENT"),
                ).embeddings
                embedding_vector = response[0].values

                # Create the datapoint structure required by Vertex AI
                # store unique id and embedding only
                datapoint = IndexDatapoint(
                    datapoint_id = f"{key}-{i}",
                    feature_vector = embedding_vector
                )
                datapoints.append(datapoint)
            except Exception as e:
                print(f"Error generating embedding for chunk {i}: {e}")
                continue

    return datapoints

def upload_datapoints_to_vertex_ai(index_resource_name, datapoints):

    updated_dense_count = 0
    try:
        index = aiplatform.MatchingEngineIndex(index_name = index_resource_name)
        index.upsert_datapoints(datapoints = datapoints)
    except Exception as e:
        raise Exception(e)