### This notebook is mainly used to create the Azure AI Search index to be used by the Llama set of libraries for vector embedding, uploading the claims data chunks and search retrieval.

#### Step 1: Import required packages.

In [0]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import AzureChatOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.storage.blob import BlobServiceClient
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult, AnalyzeDocumentRequest, ContentFormat
import time
import azure.identity
from azure.identity import DefaultAzureCredential
from openai import AzureOpenAI
from azure.identity import get_bearer_token_provider
from autogen import AssistantAgent, UserProxyAgent, register_function
from typing_extensions import List, Annotated
import autogen
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SearchField, SearchFieldDataType, VectorSearch, SimpleField, SearchableField, HnswAlgorithmConfiguration, HnswParameters, VectorSearchAlgorithmMetric, ExhaustiveKnnAlgorithmConfiguration, ExhaustiveKnnParameters, VectorSearchProfile, AzureOpenAIVectorizer, AzureOpenAIParameters, SemanticConfiguration, SemanticSearch, SemanticPrioritizedFields, SemanticField, SearchIndex
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery
from azure.search.documents.models import (
    QueryType,
    QueryCaptionType,
    QueryAnswerType
)
from llama_parse import LlamaParse

#### Step 2: Set credential variables.

In [0]:
"""
This code loads and sets the necessary variables for Azure services.
The variables are loaded from Azure Key Vault.
"""

azure_openai_endpoint=dbutils.secrets.get(scope="myscope", key="aoai-endpoint")
azure_openai_api_key=dbutils.secrets.get(scope="myscope", key="aoai-api-key")
azure_openai_api_version = "2024-02-15-preview"
azure_openai_embedding_deployment = dbutils.secrets.get(scope="myscope", key="aoai-embedding-deployment")
doc_intelligence_endpoint = dbutils.secrets.get(scope="myscope", key="docintelligence-endpoint")
doc_intelligence_key = dbutils.secrets.get(scope="myscope", key="docintelligence-key")
search_credential = AzureKeyCredential(dbutils.secrets.get(scope="myscope", key="aisearch-adminkey"))
search_endpoint = dbutils.secrets.get(scope="myscope", key="aisearch-endpoint")

#### Step 3: Connect to blob storage.

In [0]:
# Connect to Blob Storage
blob_connection_string = dbutils.secrets.get(scope="myscope", key="blobstore-connstr")
blob_container_name = "insurance-rag"
blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string)
container_client = blob_service_client.get_container_client(blob_container_name)
blobs = container_client.list_blobs()
container_url = container_client.url
print(container_url)

In [0]:
parser = LlamaParse(
    result_type="markdown",
    parsing_instruction="This is an auto insurance claim document.",
    use_vendor_multimodal_model=True,
    vendor_multimodal_model_name= "openai-gpt-4o",
    vendor_multimodal_api_key= dbutils.secrets.get(scope="myscope", key="aoai-api-key"),
    api_key = dbutils.secrets.get(scope="myscope", key="llamacloudkey")
)

In [0]:
dir(parser)

In [0]:
from llama_index.readers.azstorage_blob import AzStorageBlobReader

loader = AzStorageBlobReader(
    container_name= blob_container_name,
    connection_string=blob_connection_string
)

documents = loader.load_data()

In [0]:
help(parser)

In [0]:
dir(parser)

In [0]:
documents

In [0]:
llm_config = {
    "config_list": [
        {
            "model": dbutils.secrets.get(scope="myscope", key="aoai-deploymentname"),
            "api_key": dbutils.secrets.get(scope="myscope", key="aoai-api-key"),
            "base_url": dbutils.secrets.get(scope="myscope", key="aoai-endpoint"),
            "api_type": "azure",
            "api_version": "2024-02-15-preview",
        },
    ]
}

gpt4_config = {
    "cache_seed": 42,
    "temperature": 0,
    "config_list": llm_config["config_list"],
    "timeout": 120
}


ai_search_agent = AssistantAgent(
    name="AISearchAssistant",
    system_message="You are a helpful AI agent."
    "You can help with Azure AI Search service."
    "Return TERMINATE when the task is done",
    llm_config=gpt4_config,
)

user_proxy = UserProxyAgent(
    name="User",
    is_termination_msg=lambda x: "terminate" in x.get("content", "").lower()
    if x.get("content", "") is not None
    else False,
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    code_execution_config=False,
)

#### Step 5: Define required function and tool.

In [0]:
@user_proxy.register_for_execution()
@ai_search_agent.register_for_llm(
    description="A tool or function for search retrieval from Azure AI Search"
)
def search_retrieval(user_input:str) -> str:
        """
        Search and retrieve answers from Azure AI Search.
        Returns:
            str
        """
        query = user_input
        search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=search_credential)
        vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=5, fields="embedding", exhaustive=True)

        r = search_client.search(  
        search_text=query,
        vector_queries=[vector_query],
        select=["id", "chunk"],
        query_type=QueryType.SEMANTIC,
        semantic_configuration_name='mySemanticConfig',
        query_caption=QueryCaptionType.EXTRACTIVE,
        query_answer=QueryAnswerType.EXTRACTIVE,
        top=1
    )
        #query_result = results.get_answers()[0].text
        results = [doc["content"].replace("\n", "").replace("\r", "") for doc in r]
        content = "\n".join(results)
        return content

#### Step 6: Create Azure AI Search Index and Vector Configurations to be used by the llama generated vectors and claims data chunks and agents for retrieval.

In [0]:
# Create the search index fields and vector search configuration

# Create a search index client required to create the index
index_client = SearchIndexClient(endpoint=search_endpoint, credential=search_credential)

fields = [
    SimpleField(name="id", key=True, type=SearchFieldDataType.String, filterable=True, sortable=True, facetable=True),
    SearchableField(name="metadata", type=SearchFieldDataType.String, filterable=True, searchable=True, retrievable=True),
    SearchableField(name="chunk", type=SearchFieldDataType.String, searchable=True, sortable=True, facetable=True, retrievable=True),
    SearchableField(name="doc_id", type=SearchFieldDataType.String, searchable=True, filterable=True, retrievable=True),
    SearchField(name="embedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, retrievable=True, hidden=False, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile")
]

# Configure the vector search config
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw",
            parameters=HnswParameters(
                m=4,
                ef_construction=400,
                ef_search=500,
                metric=VectorSearchAlgorithmMetric.COSINE
            )
        )
    ],
    profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
            vectorizer="myOpenAI",  
        ),
    ],
    vectorizers=[  
        AzureOpenAIVectorizer(  
            name="myOpenAI",  
            kind="azureOpenAI",  
            azure_open_ai_parameters=AzureOpenAIParameters(  
                resource_uri=azure_openai_endpoint,  
                deployment_id=azure_openai_embedding_deployment,  
                api_key=azure_openai_api_key,  
            ),  
        ),  
    ]
)

# Configure semantic search on the index
semantic_config = SemanticConfiguration(
    name="mySemanticConfig",
    prioritized_fields=SemanticPrioritizedFields(
        content_fields=[
            SemanticField(field_name="chunk")
        ]
    )
)
# Create the semantic search config
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index
index_name = "llama-insurance-index"
index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search)
result = index_client.create_or_update_index(index=index)
print(f"{result.name} created")