### Indexing
This is developed according to 
https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/online_feature_serving_and_vector_retrieval_bigquery_data_with_feature_store.ipynb

We use FeatureStore as it is the best approach for profduction ready development and online servings.

In [None]:
from google.cloud import bigquery
from google.cloud.aiplatform_v1.types import NearestNeighborQuery
from vertexai.resources.preview import (FeatureOnlineStore, FeatureView,
                                        FeatureViewBigQuerySource)
from vertexai.resources.preview.feature_store import utils

#set project info
PROJECT = !gcloud config get-value project
PROJECT_ID = PROJECT[0]
REGION = "us-central1" 

bq_client = bigquery.Client(project=PROJECT_ID)


### 1- Create Online Feature Ftore

In [None]:
FEATURE_ONLINE_STORE_ID = "Nine_Quality_Test_MM_FeatureStore"  # @param {type: "string"}
ofs = FeatureOnlineStore.create_optimized_store(FEATURE_ONLINE_STORE_ID)


# get full information of the created feature online store instance
ofs.gca_resource

In [None]:
# Use get to verify the store is created.
FeatureOnlineStore(FEATURE_ONLINE_STORE_ID).gca_resource

### 2-Create Feature View Instance

In [None]:
FEATURE_VIEW_ID = "feature_view_nine_quality_test"  # @param {type: "string"}
# A schedule is created based on cron setting.
CRON_SCHEDULE = "TZ=America/Los_Angeles 00 13 11 8 *"  # @param {type: "string"}

In [2]:
# Index configs
DIMENSIONS = 1408  # @param {type: "number"}
EMBEDDING_COLUMN = "multimodal_embedding"  # @param {type: "string"}
# Optional
LEAF_NODE_EMBEDDING_COUNT = 10000  # @param {type: "number"}
# Optional
#to do: set this later
#CROWDING_COLUMN = "cited_by_filing_date"  # @param {type: "string"}
# Optional
FILTER_COLUMNS = ["id","media_type","path","end_offset_sec_chapter","start_offset_sec_chapter"]  # @param

In [None]:
BQ_DATASET_ID='Nine_Quality_Test'
BQ_TABLE_ID='multimodal_embeddings'
BQ_TABLE_ID_FQN = f"{PROJECT_ID}.{BQ_DATASET_ID}.{BQ_TABLE_ID}"
DATA_SOURCE = f"bq://{BQ_TABLE_ID_FQN}"

big_query_source = FeatureViewBigQuerySource(
    uri=DATA_SOURCE, entity_id_columns=["id"]
)

index_config = utils.IndexConfig(
    embedding_column=EMBEDDING_COLUMN,
    dimensions=DIMENSIONS,
    crowding_column=None,# to do:  define CROWDING_COLUMN
    #filter_columns=FILTER_COLUMNS, #for multimodal embeddings this can be set to None
    algorithm_config=utils.TreeAhConfig(),
)

print(f"index_config: {index_config}")

nine_fv = ofs.create_feature_view(
    FEATURE_VIEW_ID,
    source=big_query_source,
     # Optional, can be set to None.
    #to do: set to CRON_SCHEDULE
    sync_config=CRON_SCHEDULE, 
    index_config=index_config,
)

In [None]:
#Verify that the FeatureView instance is created by getting the feature view.
FeatureView(
    FEATURE_VIEW_ID, feature_online_store_id=FEATURE_ONLINE_STORE_ID
).gca_resource
     

In [None]:
#for mutimodal embeddings we can ignore this, unless having a description column that want to search through it
# country_filter = NearestNeighborQuery.StringFilter(
#     name="country",
#     allow_tokens=["WIPO (PCT)"],  # try different allow tokens
#     deny_tokens=["United States"],  # try different deny tokens
# )

In [None]:
EMBEDDINGS = [1] * DIMENSIONS

nine_fv.search(
    embedding_value=EMBEDDINGS,
    neighbor_count=10,
    #string_filters=[country_filter],#for multimodal embedding this can be set to None, unless having a description column
    return_full_entity=True,  # returning entities with metadata
)