In [20]:
import os
import asyncio
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [21]:
search_service_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
search_service_key = os.environ["AZURE_SEARCH_API_KEY"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.getenv("AZURE_OPENAI_KEY", "")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_embedding_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS", 1024))
embedding_model_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-06-01")
index_name="scorp-index-demo"

## Index Creation

In [26]:
from azure.search.documents.indexes.aio import SearchIndexClient
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes.models import (
    SearchIndex,
    CorsOptions,
    ScoringProfile,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    VectorSearch,
    VectorSearchProfile,
    HnswAlgorithmConfiguration,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch
)
from typing import List

client = SearchIndexClient(search_service_endpoint, AzureKeyCredential(search_service_key))

index_fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String, filterable=True),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=azure_openai_embedding_dimensions, vector_search_profile_name="HnswProfile")
]

vector_search = VectorSearch(
    profiles=[
        VectorSearchProfile(
            name="HnswProfile",
            algorithm_configuration_name="hnsw-algo",
            vectorizer_name="myVectorizer"
        )
    ],
    algorithms=[
        HnswAlgorithmConfiguration(
            name="hnsw-algo"
        )
    ],
    vectorizers=[
        AzureOpenAIVectorizer(
            vectorizer_name="myVectorizer",
            parameters=AzureOpenAIVectorizerParameters(
                resource_url=azure_openai_endpoint,
                deployment_name=azure_openai_embedding_deployment,
                model_name=embedding_model_name,
                api_key=azure_openai_key
            )
        )
    ]
)

semantic_configuration = SemanticConfiguration(
    name="semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        content_fields=[SemanticField(field_name="content")],
        keywords_fields=[SemanticField(field_name="category")]
    )
)

cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles: List[ScoringProfile] = []

semantic_search = SemanticSearch(configurations=[semantic_configuration])

index = SearchIndex(
    name=index_name,
    fields=index_fields,
    vector_search=vector_search,
    semantic_search=semantic_search,
    cors_options=cors_options,
    scoring_profiles=scoring_profiles
)

Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x00000218FFE53590>


In [27]:
result = asyncio.create_task(client.create_or_update_index(index=index))

## Generating embeddings

In [28]:
from openai import AzureOpenAI

openai_client = AzureOpenAI(
        azure_deployment=azure_openai_embedding_deployment,
        api_version=azure_openai_api_version,
        azure_endpoint=azure_openai_endpoint,
        api_key=azure_openai_key
    )

def get_embeddings(text):
    response = openai_client.embeddings.create(input=text, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions)
    return response.data[0].embedding

In [12]:
# res = get_embeddings("Hey")

## PDF Reader

In [12]:
from pypdf import PdfReader

file_path = '../files/QML-DS.pdf'

pdf_content = PdfReader(file_path)

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


# Upload Documents

In [13]:
from azure.search.documents import SearchClient

search_client = SearchClient(search_service_endpoint, index_name, AzureKeyCredential(search_service_key))

In [29]:

documents = []

for i, page in enumerate(pdf_content.pages):
    text = page.extract_text()
    
    documents.append({
        "id": str(i + 1),
        "title": "QLM-DS",
        "category": "QML",
        "content": text,
        "contentVector": get_embeddings(text)
    })
    
search_client.upload_documents(documents=documents)

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49ed50>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49f830>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49f080>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49f9e0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49cef0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49f320>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49eed0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49d940>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49f8c0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49c2c0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2188f49cdd0>,
 <azure.search.docume

## Retrieve Data

In [30]:
from azure.search.documents.models import VectorizableTextQuery, VectorizedQuery
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType

query = "What is conjugate transpose of ket?"
embedding = openai_client.embeddings.create(input=query, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions).data[0].embedding

vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")

# vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=2, fields="contentVector", )

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name='semantic-config',
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE
)

In [31]:
for result in results:
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")

Title: QLM-DS
Score: 0.6472906
Content: 4
It is simple as it is: quantum states are denoted using ket notation |a⟩, which is a column vector in Cncomplex
space (quantum states are also deﬁned on a space with inﬁnite number of dimensions, but this is not relevant to the
quantum algorithms presented in the text). However, several question arise:
•What does it exactly mean that the state of a particle is a linear combination?
•Why we use complex numbers as weights?
We will try to address and answer the following questions, revealing the beauty of quantum mechanics. The quantum
state being in a linear combination is equivalent to say that a particle is in a quantum superposition of the basis states.
The state|ψ⟩means that a particle before measurement is a probability wave corresponding to the probabilities of
being measured at {x1,x2,...,xn}locations. Thus, a question where is a particle before measurement? according to
Copenhagen interpretation of quantum mechanics is meaningless. In mac