#### Build Vector search index

In [None]:
import openai
import os
import json
from tenacity import retry, wait_random_exponential, stop_after_attempt  
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.models import Vector  
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    PrioritizedFields,  
    SemanticField,  
    SearchField,  
    SemanticSettings,  
    VectorSearch,  
    VectorSearchAlgorithmConfiguration,  
)  
  
# Configure environment variables  
az_search_service_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
az_search_index_name = os.environ["AZURE_SEARCH_INDEX_NAME"]
az_search_credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"])

In [None]:
openai.api_type = os.environ['OPENAI_API_TYPE']
openai.api_base = os.environ["OPENAI_API_EMBEDDINGS_BASE"]
openai.api_key = os.environ['OPENAI_API_EMBEDDINGS_KEY']
openai.api_version = os.environ["OPENAI_API_EMBEDDINGS_VERSION"]
embeddings_deploy_name = os.environ["OPENAI_API_EMBEDDINGS_DEPLOY"]


In [None]:
print(az_search_index_name)
print(az_search_service_endpoint)
print(az_search_credential)
print(embeddings_deploy_name)

In [None]:

index_client = SearchIndexClient(
    endpoint=az_search_service_endpoint, credential=az_search_credential)
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="NLQuery", type=SearchFieldDataType.String),
    SearchableField(name="KQLQuery", type=SearchFieldDataType.String),
    SearchField(name="NLQueryVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="kql-vector-config"),
    SearchField(name="KQLQueryVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="kql-vector-config"),
]

vector_search = VectorSearch(
    algorithm_configurations=[
        VectorSearchAlgorithmConfiguration(
            name="kql-vector-config",
            kind="hnsw",
            hnsw_parameters={
                "m": 4,
                "efConstruction": 400,
                "efSearch": 500,
                "metric": "cosine"
            }
        )
    ]
)

semantic_config = SemanticConfiguration(
    name="kql-semantic-config",
    prioritized_fields=PrioritizedFields(
        title_field=SemanticField(field_name="NLQuery"),
        prioritized_content_fields=[SemanticField(field_name="KQLQuery")]
    )
)

# Create the semantic settings with the configuration
semantic_settings = SemanticSettings(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=az_search_index_name, fields=fields,
                    vector_search=vector_search, semantic_settings=semantic_settings)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

In [None]:
with open('./data/kql_examples.json', 'r', encoding='utf-8') as file:
    input_data = json.load(file)

@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def generate_embeddings(text):
    response = openai.Embedding.create(input=text, engine=embeddings_deploy_name)
    embeddings = response['data'][0]['embedding']
    return embeddings


for item in input_data:
    input_query = item['NLQuery']
    output_query = item['KQLQuery']
    input_query_embeddings = generate_embeddings(input_query)
    output_query_embeddings = generate_embeddings(output_query)
    item['NLQueryVector'] = input_query_embeddings
    item['KQLQueryVector'] = input_query_embeddings

with open("./output/docVectors.json", "w") as f:
    json.dump(input_data, f)

In [None]:
# Upload some documents to the index
with open('./output/docVectors.json', 'r') as file:  
    documents = json.load(file)  
search_client = SearchClient(endpoint=az_search_service_endpoint, 
                             index_name=az_search_index_name, 
                             credential=az_search_credential)
result = search_client.upload_documents(documents)  
print(f"Uploaded {len(documents)} documents") 