In [1]:
import os
from dotenv import load_dotenv

# Load environment variables
if load_dotenv():
    print("Found Azure OpenAI API Base Endpoint: " + os.getenv("AZURE_OPENAI_ENDPOINT"))
else: 
    print("Azure OpenAI API Base Endpoint not found. Have you configured the .env file?")

Found Azure OpenAI API Base Endpoint: https://dzgpt4n.openai.azure.com/


In [7]:
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential

from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndex

)

credential = AzureKeyCredential(os.environ["AZURE_AI_SEARCH_KEY"]) if len(os.environ["AZURE_AI_SEARCH_KEY"]) > 0 else DefaultAzureCredential()

index_name = "products-semantic-index"

index_client = SearchIndexClient(
    endpoint=os.environ["AZURE_AI_SEARCH_ENDPOINT"], 
    credential=credential
)

# Create a search index with the fields and a vector field which we will fill with a vector based on the overview field
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="name", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String),
    SearchableField(name="price", type=SearchFieldDataType.String, sortable=True, filterable=True),
    SearchableField(name="tags", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),
    SearchableField(name="brand", type=SearchFieldDataType.String),
    SearchableField(name="description", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
    SearchField(name="vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
]

# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw"
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
        )
    ]
)

# Configure the semantic search configuration to prefer title and tagline fields over overview
semantic_config = SemanticConfiguration(
    name="products-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="name"),
        keywords_fields=[SemanticField(field_name="category")],
        content_fields=[SemanticField(field_name="description")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=index_name, fields=fields,
                    vector_search=vector_search, semantic_search=semantic_search)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

 products-semantic-index created


In [3]:
from enum import Enum
from pydantic import BaseModel

class QuestionType(str, Enum):
    multiple_choice = "multiple_choice"
    true_or_false = "true_or_false"
    estimation = "estimation"

class Ask(BaseModel):
    question: str | None = None
    type: QuestionType
    correlationToken: str | None = None

class Answer(BaseModel):
    answer: str
    correlationToken: str | None = None
    promptTokensUsed: int | None = None
    completionTokensUsed: int | None = None

In [8]:
import os
import json
from openai import AzureOpenAI
from azure.search.documents import SearchClient

client = AzureOpenAI(
        api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
        api_version = os.getenv("AZURE_OPENAI_VERSION"),
        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

embedding_model = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL")

# use an embeddingsmodel to create embeddings
def get_embedding(text, model=embedding_model):
    return client.embeddings.create(input = [text], model=model).data[0].embedding

# 1. define function to parse csv row and create embedding for overview text
def parseProduct(product):
    print(product)
    return dict([
        ("id", str(product["id"])),
        ("category", product["category"]),
        ("name", str(product["name"])),
        ("tags", product["tags"]),
        ("price", str(product["price"])),
        ("brand", str(product["brand"])),
        ("description", str(product["description"])),
        ("vector", get_embedding(product["description"]))
    ])

# 2. load products from json
products = []
with open('./products.json') as json_data:
    productsJson = json.load(json_data)
    line_count = 0
    for productJson in productsJson:
        productEmbedding = parseProduct(productJson)
        products.append(productEmbedding)
        line_count += 1
    print(f'Processed {line_count} lines.')
print('Loaded %s products.' % len(products))


# 3. upload documents to vector store
search_client = SearchClient(
    endpoint=os.environ["AZURE_AI_SEARCH_ENDPOINT"], 
    index_name=index_name,
    credential=credential
)

result = search_client.upload_documents(products)
print(f"Successfully loaded {len(products)} products into Azure AI Search index.")

{'id': 2, 'category': 'Computer equipment', 'name': 'UltraSlim Wireless Keyboard', 'brand': 'TechSavvy', 'price': 89.99, 'description': 'The UltraSlim Wireless Keyboard by TechSavvy combines sleek design with superior functionality. Perfect for both office and home use, this keyboard features a low-profile, ergonomic build that ensures comfortable typing for extended periods. Its wireless connectivity and long battery life make it a convenient choice for users who value both style and efficiency. The keyboard also includes customizable hotkeys and an integrated touchpad for added convenience. Upgrade your typing experience with the UltraSlim Wireless Keyboard.', 'tags': ['keyboard', 'wireless', 'tech', 'ergonomic', 'slim'], 'svg': "<svg width='100' height='100' viewBox='0 0 100 100' xmlns='http://www.w3.org/2000/svg'><rect x='10' y='20' width='80' height='40' fill='#d3d3d3'/><rect x='15' y='25' width='8' height='8' fill='#ffffff'/><rect x='25' y='25' width='8' height='8' fill='#ffffff'

In [9]:
from openai import AzureOpenAI
from azure.search.documents.models import (
    VectorizedQuery
)

client = AzureOpenAI(
        api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
        api_version = os.getenv("AZURE_OPENAI_VERSION"),
        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

deployment_name = os.getenv("AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME")
model_name = os.getenv("AZURE_OPENAI_COMPLETION_MODEL")

index_client = SearchClient(
    endpoint=os.environ["AZURE_AI_SEARCH_ENDPOINT"], 
    index_name=index_name,
    credential=credential
)

question = "Tell me about the latest Product. When was it released?"

# create a vectorized query based on the question
vector = VectorizedQuery(vector=get_embedding(question), k_nearest_neighbors=5, fields="vector")


# create search client to retrieve products from the vector store
found_docs = list(search_client.search(
    search_text=None,
    query_type="semantic",
    semantic_configuration_name="products-semantic-config",
    vector_queries=[vector],
    select=["name", "category", "description"],
    top=5
))

# print the found documents and the field that were selected
found_docs_as_text = " "
for doc in enumerate(found_docs, start=1):    
    print("Name: {}".format(doc["name"]))
    print("Category: {}".format(doc["category"]))
    print("----------")

    found_docs_as_text += " "+ "Name: {}".format(doc["name"]) +" "+ "Description: {}".format(doc["description"])

# augment the question with the found documents and ask the LLM to generate a response
system_prompt = "You are an assistant to the user, you are given some context below, please answer the query of the user with as detail as possible"

parameters = [system_prompt, ' Context:', found_docs_as_text , ' Question:', question]
joined_parameters = ''.join(parameters)

response = client.chat.completions.create(
        model = deployment_name,
        messages = [{"role" : "assistant", "content" : joined_parameters}],
    )

print (response.choices[0].message.content)

Name: TechView X500
Category: Tablets
----------
Name: GigaTab Pro 11
Category: Tablets
----------
Name: Galaxy Tab S8
Category: Tablets
----------
Name: UltraSmart Tablet Pro
Category: Electronics
----------
Name: ProTab X10
Category: Tablets
----------
The latest product is the **ProTab X10 by TechMaster**. This cutting-edge tablet is designed for both productivity and entertainment, featuring a 10.1-inch high-definition display, a powerful octa-core processor, 4GB of RAM, and 128GB of internal storage, which is expandable via microSD. It also boasts a battery life of up to 12 hours on a single charge. However, the specific release date for the ProTab X10 is not provided in the given context. For the exact release date, it would be advisable to check TechMaster's official announcements or contact their customer support.
