In [1]:
from dotenv import load_dotenv,dotenv_values,find_dotenv
import json
import os
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.models import (
    QueryAnswerType,
    QueryCaptionType,
    QueryCaptionResult,
    QueryAnswerResult,
    SemanticErrorMode,
    SemanticErrorReason,
    SemanticSearchResultsType,
    QueryType,
    VectorizedQuery,
    VectorQuery,
    VectorFilterMode,    
)
from azure.search.documents.indexes.models import (  
    ExhaustiveKnnAlgorithmConfiguration,
    ExhaustiveKnnParameters,
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    SemanticPrioritizedFields,
    SemanticField,  
    SearchField,  
    SemanticSearch,
    VectorSearch,  
    HnswAlgorithmConfiguration,
    HnswParameters,  
    VectorSearch,
    VectorSearchAlgorithmConfiguration,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    VectorSearch,
    ExhaustiveKnnParameters,
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    SemanticField,  
    SearchField,  
    VectorSearch,  
    HnswParameters,  
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchAlgorithmMetric,
    VectorSearchProfile,
)  
  
# Configure environment variables  
load_dotenv(find_dotenv("../.env.sample"),override=True)
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
model = os.getenv("MODEL_NAME")
credential = AzureKeyCredential(key)

In [3]:
model = SentenceTransformer(os.getenv("MODEL_NAME"))

  return self.fget.__get__(instance, owner)()


In [4]:
from openai import AzureOpenAI
import os

client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_KEY"), 
  api_version = "2023-05-15",
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)
model: str = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID")
# MODEL_NAME: str = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID")

# Function to generate embeddings for title and content fields, also used for query embeddings
# @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def generate_embeddings(text, model=model):
    return client.embeddings.create(input = [text], model=model).data[0].embedding

query = "How many incidents distinct state do we have?"
query_vector = generate_embeddings(query,model)
print(len(query_vector))

1536


In [5]:
# # query = "Segmentation"  
# query = "FOR-IT Logistics & Harvesting"
# query_vector = model.encode([query])[0]
# print(query_vector)

### Vector Search

In [6]:
 
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector_query = VectorizedQuery(vector=query_vector, 
                               k_nearest_neighbors=3, 
                               fields="embedding")

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query],
    select=["line", "filename"],
    top=5
)  
  
for result in results:  
    print(f"Score: {result['@search.score']}")  
    print(f"Captions: {result['@search.captions']}")  
    print(f"Highlights: {result['@search.highlights']}")  
    print(f"Content: {result['line']}\n")  
    print(f"Filename: {result['filename']}\n")  
    print("###############################")

Score: 0.794375
Captions: None
Highlights: None
Content: FOR-IT Logistics & Harvesting, FOREST, 2023-06-27 12:49:38, 2023-07-12 10:00:01, INC2586873, Trade 101499609 has wrong state, Trade 101499609 block 1 has wrong state in harvesting schedule. If trade is searched from trade search blocks status is shown as ended. Still there is logging residue forwarding block in harvest schedule with state of "ohjelmassa" Block seems to be stuck in schedule and user cannot do anything to that block. Please investigate issue. One possible solution is to change that logging residue forwarding work orders state to Ended., Data Issue, 4 - Low, Closed, Received closure confirmation from user., Solved (Permanently)

Filename: incidents_2023_forest.xlsx

###############################
Score: 0.7938253
Captions: None
Highlights: None
Content: WSS, FOREST, 2023-03-20 08:10:29, 2023-03-23 09:57:21, INC2567887, WSS, package is in stock, should be out of stock, Delivery note 23176010035 package 375035, this 

### Hybrid Search

In [7]:
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector_query = VectorizedQuery(vector=query_vector, 
                               k_nearest_neighbors=3, 
                               fields="embedding")

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["line", "filename"],
    top=3
)  
  
for result in results:  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['line']}\n")  
    print(f"Filename: {result['filename']}\n")  
    print("###############################")

Score: 0.030180182307958603
Content: FOR-IT Logistics & Harvesting, FOREST, 2023-06-27 12:49:38, 2023-07-12 10:00:01, INC2586873, Trade 101499609 has wrong state, Trade 101499609 block 1 has wrong state in harvesting schedule. If trade is searched from trade search blocks status is shown as ended. Still there is logging residue forwarding block in harvest schedule with state of "ohjelmassa" Block seems to be stuck in schedule and user cannot do anything to that block. Please investigate issue. One possible solution is to change that logging residue forwarding work orders state to Ended., Data Issue, 4 - Low, Closed, Received closure confirmation from user., Solved (Permanently)

Filename: incidents_2023_forest.xlsx

###############################
Score: 0.023060109466314316
Content: WSS, FOREST, 2023-03-20 08:10:29, 2023-03-23 09:57:21, INC2567887, WSS, package is in stock, should be out of stock, Delivery note 23176010035 package 375035, this should be out of stock, this package have

### Exhaustive KNN exact nearest neighbor search

In [8]:
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector_query = VectorizedQuery(vector=query_vector, k_nearest_neighbors=3, 
                               fields="embedding",exhaustive=True)

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query],
    select=["line", "filename"],
    top=3
)  
  
for result in results:  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['line']}\n")  
    print(f"Filename: {result['filename']}\n")  
    print("###############################")

Score: 0.794375
Content: FOR-IT Logistics & Harvesting, FOREST, 2023-06-27 12:49:38, 2023-07-12 10:00:01, INC2586873, Trade 101499609 has wrong state, Trade 101499609 block 1 has wrong state in harvesting schedule. If trade is searched from trade search blocks status is shown as ended. Still there is logging residue forwarding block in harvest schedule with state of "ohjelmassa" Block seems to be stuck in schedule and user cannot do anything to that block. Please investigate issue. One possible solution is to change that logging residue forwarding work orders state to Ended., Data Issue, 4 - Low, Closed, Received closure confirmation from user., Solved (Permanently)

Filename: incidents_2023_forest.xlsx

###############################
Score: 0.79382503
Content: WSS, FOREST, 2023-03-20 08:10:29, 2023-03-23 09:57:21, INC2567887, WSS, package is in stock, should be out of stock, Delivery note 23176010035 package 375035, this should be out of stock, this package have invoiced, invoice num

### Semantic Hybrid Search

In [9]:
search_client = SearchClient(service_endpoint, 
                             index_name, 
                             AzureKeyCredential(key))  
vector_query = VectorizedQuery(vector=query_vector, 
                               k_nearest_neighbors=3, 
                               fields="embedding")

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["line","filename"],
    query_type=QueryType.SEMANTIC, 
    semantic_configuration_name='my-semantic-config', 
    query_caption=QueryCaptionType.EXTRACTIVE, 
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=3
)  

In [10]:
semantic_answers = results.get_answers()
for answer in semantic_answers:
    if answer.highlights:
        print(f"Semantic Answer: {answer.highlights}")
    else:
        print(f"Semantic Answer: {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")

HttpResponseError: (FeatureNotSupportedInService) Semantic search is not enabled for this service.
Parameter name: queryType
Code: FeatureNotSupportedInService
Message: Semantic search is not enabled for this service.
Parameter name: queryType
Exception Details:	(SemanticQueriesNotAvailable) Semantic search is not enabled for this service.
	Code: SemanticQueriesNotAvailable
	Message: Semantic search is not enabled for this service.

In [None]:
semantic_answers

In [None]:
for result in results:
    print(f"Reranker Score: {result['@search.reranker_score']}")
    print(f"Content: {result['line']}\n")

    captions = result["@search.captions"]
    if captions:
        caption = captions[0]
        if caption.highlights:
            print(f"Caption: {caption.highlights}\n")
        else:
            print(f"Caption: {caption.text}\n")
            