# New index deployment 
This notebook creates new index for the Vector Search, it should be used whenever we want to alter the index to add more metadata, change embedding strategy or chunking strategy.


In [None]:
# flake8: noqa
import time
import os
from src.vectordb.gcp_vector_search.transform_and_load import single_text_embedding
from google.cloud import aiplatform
import json

In [None]:
# SET THE ENV VARIABLES, INDEX NAME ETC.
os.environ['DOC_AI_LOCATION'] = "us"
os.environ['DOC_AI_PROCESSOR_ID'] = "e977fdd46ee23308"
os.environ['PROJECT_ID'] = "602280418311"
os.environ['LOCATION'] = "us-west1"
os.environ['GCS_BUCKET'] = "chatbot_docs"

BUCKET_URI = f"gs://{os.environ['GCS_BUCKET']}"
DIMENSIONS = 768
EMBEDDING_DIR = f"{BUCKET_URI}"
version = time.strftime("%Y%m%d-%H%M%S")
suffix = "_langchain_retriever"
index_display_name = f"chatbot_docs_working{suffix}"
endpoint_display_name = f"chatbot_docs_endpoint_working{suffix}"

In [None]:
# SET INDEX ATTRIBUTES AND FIELDS AND UPLOAD THE DATA TO GCS
initial_config = {
    "id": "test_id",
    "text": "test text",
    "filename": "test_filename.pdf",
    "embedding": single_text_embedding("test text"),
}

with open("data.json", "w") as f:
    json.dump(initial_config, f)

# UNCOMMENT !gsutil cp data.json {EMBEDDING_DIR}/file.json

In [None]:
aiplatform.init(project=os.environ['PROJECT_ID'], 
                location=os.environ['LOCATION'], 
                staging_bucket=BUCKET_URI)

In [None]:
my_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name=index_display_name,
    contents_delta_uri=EMBEDDING_DIR,
    dimensions=DIMENSIONS,
    approximate_neighbors_count=10,
    distance_measure_type="DOT_PRODUCT_DISTANCE",
)

In [None]:
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
    display_name=endpoint_display_name, public_endpoint_enabled=True
)
my_index_endpoint = my_index_endpoint.deploy_index(
    index=my_index, deployed_index_id=endpoint_display_name
)