In [1]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SearchIndex,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    ScoringProfile,
    TagScoringFunction,
    TagScoringParameters
)
from dotenv import load_dotenv

import os

In [2]:
load_dotenv(override=True)

AZURE_SEARCH_SERVICE=os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_KEY=os.getenv("AZURE_SEARCH_KEY")
AZURE_OPENAI_ENDPOINT=os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_VERSION=os.getenv("AZURE_OPENAI_VERSION")
AZURE_OPENAI_KEY=os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_DEPLOYMENT=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_STORAGE_CONNECTION=os.getenv("AZURE_STORAGE_CONNECTION")
AZURE_OPENAI_EMBEDDING_MODEL=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")

credential = AzureKeyCredential(AZURE_SEARCH_KEY)

In [4]:
# Existing index name, client and fields
index_name = "py-rag-training-idx"
index_client = SearchIndexClient(
    endpoint=AZURE_SEARCH_SERVICE, credential=credential)
fields = [
    SearchField(name="parent_id", type=SearchFieldDataType.String),
    SearchField(name="title", type=SearchFieldDataType.String),
    SearchField(name="locations", type=SearchFieldDataType.Collection(
        SearchFieldDataType.String), filterable=True),
    SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True,
                sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),
    SearchField(name="chunk", type=SearchFieldDataType.String,
                sortable=False, filterable=False, facetable=False),
    SearchField(name="text_vector", type=SearchFieldDataType.Collection(
        SearchFieldDataType.Single), vector_search_dimensions=1024, vector_search_profile_name="myHnswProfile")
]

# Existing vector search configuration
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(name="myHnsw"),
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
            vectorizer_name="myOpenAI",
        )
    ],
    vectorizers=[
        AzureOpenAIVectorizer(
            vectorizer_name="myOpenAI",
            kind="azureOpenAI",
            parameters=AzureOpenAIVectorizerParameters(
                resource_url=os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT"),
                api_key=os.getenv("AZURE_OPENAI_EMBEDDING_API_KEY"),
                deployment_name=os.getenv(
                    "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
                model_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
            ),
        ),
    ],
)

# New semantic configuration
semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="locations")],
        content_fields=[SemanticField(field_name="chunk")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# New scoring profile
scoring_profiles = [
    ScoringProfile(
        name="my-scoring-profile",
        functions=[
            TagScoringFunction(
                field_name="locations",
                boost=5.0,
                parameters=TagScoringParameters(
                    tags_parameter="tags",
                ),
            )
        ]
    )
]


index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search,
                    semantic_search=semantic_search, scoring_profiles=scoring_profiles)
result = index_client.create_or_update_index(index)

print(f"{result.name} updated")

py-rag-training-idx updated
