### Step_0: Load Libraries

In [13]:
# Import required libraries  
import os  
import json  
import openai  
import pandas as pd
from dotenv import load_dotenv  
from tenacity import retry, wait_random_exponential, stop_after_attempt  
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.models import Vector  
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    PrioritizedFields,  
    SemanticField,  
    SearchField,  
    SemanticSettings,  
    VectorSearch,  
    HnswVectorSearchAlgorithmConfiguration,  
)  
  

# Configure environment variables  
load_dotenv()  
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT_2") 
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME_2") 
key = os.getenv("AZURE_SEARCH_ADMIN_KEY_2") 
openai.api_type = "azure"  
openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")  
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")  
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION") 
model: str = "text-embedding-ada-002" 
credential = AzureKeyCredential(key)


### Step_1: Create Search Index

In [14]:
# Create a search index
index_client = SearchIndexClient(
    endpoint=service_endpoint, credential=credential)
fields = [
    SimpleField(name="id", type="Edm.String", key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SimpleField(name="category", type="Edm.String"),
    SimpleField(name="sourcepage", type="Edm.String", filterable=True, facetable=True),
    SimpleField(name="sourcefile", type="Edm.String", filterable=True, facetable=True),
    SearchableField(name="companyname", type=SearchFieldDataType.String, key=False, sortable=True, filterable=True, facetable=True),
    SearchableField(name="formtype", type=SearchFieldDataType.String, key=False, sortable=True, filterable=True, facetable=True),        
    SearchableField(name="fiscalyear", type=SearchFieldDataType.String, key=False, sortable=True, filterable=True, facetable=True),
    SearchableField(name="quarter", type=SearchFieldDataType.String, key=False, sortable=True, filterable=True, facetable=True),
    SearchField(name="embedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="modified-vector-config"),
    SearchableField(name="description", type=SearchFieldDataType.String),
    SearchField(name="description_embedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="modified-vector-config"),
]



vector_search = VectorSearch(
    algorithm_configurations=[
        HnswVectorSearchAlgorithmConfiguration(
            name="modified-vector-config",
            kind="hnsw",
            parameters={
                "m": 4,
                "efConstruction": 400,
                "efSearch": 500,
                "metric": "cosine"
            }
        )
    ]
)

semantic_config = SemanticConfiguration(
    name="boosted-semantic-config",
    prioritized_fields=PrioritizedFields(
        title_field=None,
        prioritized_keywords_fields=[SemanticField(field_name="category"),SemanticField(field_name="companyname"),SemanticField(field_name="formtype")],
        prioritized_content_fields=[SemanticField(field_name="content"), SemanticField(field_name="description")]
    )
)

# Create the semantic settings with the configuration
semantic_settings = SemanticSettings(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=index_name, fields=fields,
                    vector_search=vector_search, semantic_settings=semantic_settings)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

 longcontextindex created
