In [None]:
!pip install azure-identity
!pip install azure-search-documents==11.4.0
!pip install llama-index
!pip install llama-index-embeddings-azure-openai
!pip install llama-index-llms-azure-openai
!pip install llama-index-vector-stores-azureaisearch
!pip install nest-asyncio
!pip install python-dotenv
!pip install wandb
!pip install weave
!pip install llama-index-callbacks-wandb

## Initial Setup
Load environment variables and initialize the necessary clients and models.

In [2]:
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore, IndexManagement
from llama_index.core.callbacks import LlamaDebugHandler

# Load environment variables
load_dotenv()

# Environment Variables
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME") # I'm using GPT-3.5-turbo
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME") # I'm using text-embedding-ada-002
SEARCH_SERVICE_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
SEARCH_SERVICE_API_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")
INDEX_NAME = "contoso-hr-docs"
WANDB_PROJECT="llamaindex-contos-hr-docs"
# WANDB_API_KEY = os.getenv("WANDB_API_KEY")
# os.environ['WANDB_API_KEY'] = WANDB_API_KEY

# Initialize Azure OpenAI and embedding models
llm = AzureOpenAI(
    model=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-02-01"
)

embed_model = AzureOpenAIEmbedding(
    model=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-02-01"
)

# Initialize search clients
credential = AzureKeyCredential(SEARCH_SERVICE_API_KEY)
index_client = SearchIndexClient(endpoint=SEARCH_SERVICE_ENDPOINT, credential=credential)
search_client = SearchClient(endpoint=SEARCH_SERVICE_ENDPOINT, index_name=INDEX_NAME, credential=credential)


ModuleNotFoundError: No module named 'dotenv'

# Weights & Biases Callback Manager Setup

In [2]:
import wandb
wandb.login()

AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead.

## Instrument LlamaIndex

In [3]:
from llama_index.core import set_global_handler
set_global_handler("wandb", run_args={"project": WANDB_PROJECT})

AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead.

In [11]:
! pip install --upgrade weave
! pip install numpy==1.21.6

Collecting numpy>=1.21 (from weave)
  Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl (15.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.1
    Uninstalling numpy-2.0.1:
      Successfully uninstalled numpy-2.0.1
Successfully installed numpy-1.26.4



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: Ignored the following versions that require a different python version: 1.21.2 Requires-Python >=3.7,<3.11; 1.21.3 Requires-Python >=3.7,<3.11; 1.21.4 Requires-Python >=3.7,<3.11; 1.21.5 Requires-Python >=3.7,<3.11; 1.21.6 Requires-Python >=3.7,<3.11
ERROR: Could not find a version that satisfies the requirement numpy==1.21.6 (from versions: 1.3.0, 1.4.1, 1.5.0, 1.5.1, 1.6.0, 1.6.1, 1.6.2, 1.7.0, 1.7.1, 1.7.2, 1.8.0, 1.8.1, 1.8.2, 1.9.0, 1.9.1, 1.9.2, 1.9.3, 1.10.0.post2, 1.10.1, 1.10.2, 1.10.4, 1.11.0, 1.11.1, 1.11.2, 1.11.3, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.13.3, 1.14.0, 1.14.1, 1.14.2, 1.14.3, 1.14.4, 1.14.5, 1.14.6, 1.15.0, 1.15.1, 1.15.2, 1.15.3, 1.15.4, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.19.0, 1.19.1, 1.19.2, 1.19.3, 1.19.4, 1.19.5, 1

In [12]:
import weave
weave.init(f"{WANDB_PROJECT}")

AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead.

SyntaxError: unexpected character after line continuation character (903547445.py, line 1)

## Vector Store Initialization
Set up the vector store using Azure AI Search.

In [5]:
from llama_index.core.settings import Settings

Settings.llm = llm
Settings.embed_model = embed_model

# Initialize the vector store
vector_store = AzureAISearchVectorStore(
    search_or_index_client=index_client,
    index_name=INDEX_NAME,
    index_management=IndexManagement.VALIDATE_INDEX,
    id_field_key="id",
    chunk_field_key="text",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
    language_analyzer="en.lucene",
    vector_algorithm_type="exhaustiveKnn",
)


## Use Existing Index
I'm going to use my existing "contoso-hr-docs" index that I created. For how to create an index and load documents from scratch, see [here](https://github.com/farzad528/azure-ai-search-python-playground/blob/addb1a29e70ee9dbf1bb9a39bbe367aa15e4cf5f/azure-ai-search-rag-eval-trulens.ipynb#L145).

In [19]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    [],
    storage_context=storage_context,
)

[34m[1mwandb[0m: Logged trace tree to W&B.


In [11]:
# Query execution
from llama_index.core.schema import MetadataMode
from llama_index.core.response.notebook_utils import display_response
query = "Does my health plan cover scuba diving?"
query_engine = index.as_query_engine(llm, similarity_top_k=3)
response = query_engine.query(query)

# Print the response
display_response(response)
print("\n")

# Print what the LLM sees
for node in response.source_nodes:
    print(node.get_content(metadata_mode=MetadataMode.LLM))

[34m[1mwandb[0m: Logged trace tree to W&B.


**`Final Response:`** It is important to review the plan's evidence of coverage to determine if scuba diving is covered under the health plan. Additionally, discussing this with the healthcare provider and reviewing the list of excluded services and prescriptions is recommended to ensure coverage for scuba diving. If scuba diving is not covered under the plan, exploring alternative coverage options or discussing payment options with the healthcare provider may be necessary.



page_label: 90
file_path: c:\Dev\azure-ai-search-python-playground\data\pdf\Northwind_Health_Plus_Benefits_Details.pdf

benefits for mental health and 
substance abuse services as it does for medical and surgical benefits. This includes covering 
services that are medically necessary, suc h as inpatient and outpatient services, medication 
management, and psychological and psychosocial therapies.  
It is important to note that the plan may not provide coverage or impose any limits or 
exclusions that are not in compliance with applicable laws a nd regulations. Additionally, the 
plan may not discriminate against individuals based on their medical condition or health 
status. Individuals who feel they have been discriminated against should contact the 
Department of Labor, who can investigate the iss ue. 
Finally, it is important to note that the plan may not provide coverage or impose any limits 
or exclusions that are not in compliance with applicable laws and regulations. Additiona

Above you can now visualize and see tracers in action. 

# Evalaute RAG with Weights & Biases

Using `DatasetGenerator` lets run some evaluations using a synthetic test dataset!

In [None]:
# Let's use GPT 3.5 as our LLM of choice.
service_context = ServiceContext.from_defaults(llm=llm, callback_manager=callback_manager)


# Let's just use a meaningful subset of the shuffled documents.
random_documents = copy.deepcopy(documents)
random.shuffle(random_documents)
random_documents = random_documents[:10]


# Let's reduce the number of questions per chunk.
data_generator = DatasetGenerator.from_documents(
    random_documents, service_context=service_context, num_questions_per_chunk=2
)
# Let's reduce the number of questions per chunk from 10 to 2.
