# RAG with Azure AI search

In [2]:
# Import required libraries
import os
import json
from dotenv import load_dotenv

from tenacity import retry, wait_random_exponential, stop_after_attempt
from openai import AzureOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndex,
    AzureOpenAIVectorizer,
    AzureOpenAIParameters
)


from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import json

load_dotenv()

True

In [3]:
# Configure environment variables
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
azure_openai_embedding_dimensions = 1536
index_name = "product_data_csv"

credential = AzureKeyCredential(key)

In [4]:
# Configure OpenAI API
aoai_client = AzureOpenAI(
  azure_endpoint = OPENAI_DEPLOYMENT_ENDPOINT, 
  api_key=OPENAI_API_KEY,  
  api_version="2023-05-15"
)
credential = AzureKeyCredential(key)

In [5]:
from azure.search.documents.models import VectorizedQuery

search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)

# Generate Document Embeddings using OpenAI Ada Model
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def calc_embeddings(text):
    # model = "deployment_name"
    embeddings = aoai_client.embeddings.create(input = [text], model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME).data[0].embedding
    return embeddings

def do_search(query, fields):
    embedding = calc_embeddings(query)
    vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields=fields)
  
    results = search_client.search(  
        search_text=None,  
        vector_queries= [vector_query],
        select=["name", "description"],
    )  

    for result in results:  
        print(f"Score: {result['@search.score']}")  
        print(f"Name: {result['name']}")  
        print(f"Description: {result['description']}\n")  

In [6]:
# Pure Vector Search
do_search("nonstick grills", "description_embedding")

# We get non stick grills as the top result and cast iron grills as the second and third result


Score: 0.88662547
Name: Cuisinart Countertop Griddler - GR4
Description: Cuisinart Countertop Griddler - GR4/ Nonstick Grill/ Knob Selector/ Light Indicator/ Temperature Controls/ Cleaning/Scraping Tool Included

Score: 0.87544817
Name: Weber Cast Iron Griddle - 7531
Description: Weber Cast Iron Griddle - 7531/ Heavy-Duty Cast Iron Griddle/ Fits Weber Genesis Silver A & Spirit 500 Gas Grills

Score: 0.8752122
Name: Weber Cast Iron Griddle - 7542
Description: Weber Cast Iron Griddle - 7542/ Heavy-Duty Cast Iron Griddle/ Two-Sided For Cooking A Variety Of Foods/ Fits Several Weber Grills



In [14]:
# Cross fieldPure Vector Search
do_search("nonstick grills", "name_embedding, description_embedding")


Score: 0.03333333507180214
Name: Cuisinart Countertop Griddler - GR4
Description: Cuisinart Countertop Griddler - GR4/ Nonstick Grill/ Knob Selector/ Light Indicator/ Temperature Controls/ Cleaning/Scraping Tool Included

Score: 0.032786883413791656
Name: Weber Cast Iron Griddle - 7531
Description: Weber Cast Iron Griddle - 7531/ Heavy-Duty Cast Iron Griddle/ Fits Weber Genesis Silver A & Spirit 500 Gas Grills

Score: 0.032258063554763794
Name: Weber Cast Iron Griddle - 7542
Description: Weber Cast Iron Griddle - 7542/ Heavy-Duty Cast Iron Griddle/ Two-Sided For Cooking A Variety Of Foods/ Fits Several Weber Grills



In [16]:
# Pure Multi-Vector weighted Search
from azure.search.documents.models import VectorizableTextQuery
query = "nonstick grills"  

vector_query_1 = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="name_embedding", weight=2)
vector_query_2 = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="description_embedding", weight=0.5)

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["name", "description"],
)  
  
for result in results:  
    print(f"Score: {result['@search.score']}")  
    print(f"Name: {result['name']}")  
    print(f"Description: {result['description']}\n")  

Score: 0.0416666679084301
Name: Cuisinart Countertop Griddler - GR4
Description: Cuisinart Countertop Griddler - GR4/ Nonstick Grill/ Knob Selector/ Light Indicator/ Temperature Controls/ Cleaning/Scraping Tool Included

Score: 0.04098360240459442
Name: Weber Cast Iron Griddle - 7531
Description: Weber Cast Iron Griddle - 7531/ Heavy-Duty Cast Iron Griddle/ Fits Weber Genesis Silver A & Spirit 500 Gas Grills

Score: 0.04032257944345474
Name: Weber Cast Iron Griddle - 7542
Description: Weber Cast Iron Griddle - 7542/ Heavy-Duty Cast Iron Griddle/ Two-Sided For Cooking A Variety Of Foods/ Fits Several Weber Grills



#### Perform a Pure Vector Search with a filter
In my case I don't have a category field in my index, so I will put the code sample for reference only   
#### THIS CODE BELOW WILL NOT WORK UNLESS YOU HAVE A COLUMN TO FILTER BY!!!

In [None]:
# Perform a Pure Vector Search with a filter
# in my case I don't have a category field in my index, so I will put the code sample for reference only 
# THIS CODE BELOW WILL NOT WORK UNLESS YOU HAVE A COLUMN TO FILTER BY!!!

from azure.search.documents.models import VectorFilterMode

# Pure Vector Search
query = "nonstick grills"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="description_embedding")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    vector_filter_mode=VectorFilterMode.PRE_FILTER,
    filter="category eq 'grills'",
    select=["name", "description"],
)
  
for result in results:  
    print(f"Score: {result['@search.score']}")  
    print(f"Name: {result['name']}")  
    print(f"Description: {result['description']}\n") 