Example based on:
- https://github.com/Azure/azure-search-vector-samples/blob/main/demo-python/code/basic-vector-workflow/azure-search-vector-python-sample.ipynb
- 

In [10]:
import os
import socket
from openai import AzureOpenAI
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
load_dotenv()

openAIClient = AzureOpenAI(
  azure_endpoint = os.getenv("OPENAI_API_ENDPOINT"), 
  api_key=os.getenv("OPENAI_API_KEY"),  
  api_version=os.getenv("OPENAI_API_VERSION")
)

dalleModel = os.getenv("DALLE_MODEL")
gpt3TurboModel = os.getenv("GPT3_MODEL")
gpt4Model = os.getenv("GPT4_MODEL")
visionModel = os.getenv("VISION_MODEL")
embeddingsModel = os.getenv("EMBEDDINGS_MODEL")
aiSearchEndpoint = os.getenv("AI_SEARCH_ENDPOINT")
aiSearchApiKey = AzureKeyCredential(os.getenv("AI_SEARCH_ADMIN_KEY"))
aiSearchIndexName = os.getenv("AI_SEARCH_INDEX_NAME") or os.getenv("CODESPACE_NAME") or socket.gethostname()

In [14]:
import json

data_source_path = os.path.join(os.curdir, 'data', 'datasource.json')
with open(data_source_path, 'r', encoding='utf-8') as file:
    documents = json.load(file)

titles = [document['title'] for document in documents]
content = [document['content'] for document in documents]
title_response = openAIClient.embeddings.create(input=titles, model=embeddingsModel)
title_embeddings = [document.embedding for document in title_response.data]
content_response = openAIClient.embeddings.create(input=content, model=embeddingsModel)
content_embeddings = [document.embedding for document in content_response.data]

# Generate embeddings for title and content fields
for i, document in enumerate(documents):
    title = document['title']
    content = document['content']
    document['titleVector'] = title_embeddings[i]
    document['contentVector'] = content_embeddings[i]

print(f'Prepared {len(documents)} documents')

Prepared 108 documents


In [18]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndex
)

# Create a search index
index_client = SearchIndexClient(endpoint=aiSearchEndpoint, credential=aiSearchApiKey)
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String, filterable=True),
    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=3072, vector_search_profile_name="myHnswProfile"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=3072, vector_search_profile_name="myHnswProfile"),
]

# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw"
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
        )
    ]
)

semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="category")],
        content_fields=[SemanticField(field_name="content")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=aiSearchIndexName, fields=fields, vector_search=vector_search, semantic_search=semantic_search)
result = index_client.create_or_update_index(index)
print(f'Index {result.name} created!')

Index ik1234 created!


In [20]:
from azure.search.documents import SearchClient

search_client = SearchClient(endpoint=aiSearchEndpoint, index_name=aiSearchIndexName, credential=aiSearchApiKey)
result = search_client.upload_documents(documents)
print(f"Indexed {len(documents)} documents")

Indexed 108 documents


In [51]:
from azure.search.documents.models import VectorizedQuery, VectorFilterMode

searchQuery = "tools for software development"  
  
embedding = openAIClient.embeddings.create(input=searchQuery, model=embeddingsModel).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")

searchResults = search_client.search(  
    search_text=None, 
    # search_text=query, # Hybrid search
    vector_queries=[vector_query],
    select=["id", "title", "content", "category"],
    # vector_filter_mode=VectorFilterMode.PRE_FILTER,
    # filter="category eq 'Developer Tools'",
    # top=3 # Limit search results
)

sources = []
for result in searchResults:  
    print(f"Id: {result['id']}")  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")
    sources.append({
        "Id": result['id'],
        "Title": result['title'],
        "Content": result['content'],
    })

Title: Azure DevOps
Score: 0.60645723
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.5913076
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

In [52]:
systemMessage="""
You are a helpful senior software developer.
I will ask you software development related questions and provide you with sources.
I expect you to provide a concise response based on the sources which I provided you.
Only include my sources in your answers.
"""

userMessage=f"""
QUESTION:
{searchQuery}
------
SOURCES:
{sources}
"""

response = openAIClient.chat.completions.create(
  model=gpt3TurboModel,
  messages = [
    {"role": "system", "content": systemMessage},
    {"role": "user", "content": userMessage},
])

print(response.choices[0].message.content)

There are several tools for software development that can be used in conjunction with Azure services. 

Azure DevOps is a comprehensive suite of services that includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. It supports a wide range of programming languages, frameworks, and platforms, and integrates with other Azure services like Azure App Service and Azure Functions. [Source: Azure DevOps]

Azure DevTest Labs is a fully managed service that allows you to create, manage, and share development and test environments on Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms such as Windows, Linux, and Kubernetes, and integrates with other Azure services like Azure Virtual Machines and Azure App Service. 

In [86]:
import requests

hrQuery="congé paternité"
codeTravailResults = requests.get("https://code.travail.gouv.fr/_next/data/IZ6D3hYYuhJr3QgZA94HM/recherche.json", params={'q': hrQuery}).json()


In [87]:
codeTravailSources = []
for result in codeTravailResults['pageProps']['items']['articles']:
    codeTravailSources.append({
        "Id": result.get('id', None),
        "Title": result.get('title', None),
        "Content": result.get('description', None),
        "Url": result.get('url', None),
    })

print(codeTravailSources)

[{'Id': 'LEGIARTI000006900917', 'Title': 'L1225-35', 'Content': "Après la naissance de l'enfant et dans un délai déterminé par décret, le père salarié ainsi que, le cas échéant, le conjoint salarié de la mère ou la personne salariée liée à elle par un pacte civil de solidarité ou vivant maritalement avec elle bénéficient d'un congé de paternité et d'accueil de l'enfant de onze jours consécutifs ou de dix-huit jours consécutifs en cas de naissances multiples. Le congé de paternité et d'accueil de l'enfant entraîne la suspension du contrat de travail. Le salarié qui souhaite bénéficier du congé de paternité et d'accueil de l'enfant avertit son employeur au moins un mois avant la date à laquelle il envisage de le prendre, en précisant la date à laquelle il entend y mettre fin. Par dérogation aux trois premiers alinéas, lorsque l'état de santé de l'enfant nécessite son hospitalisation immédiate après la naissance dans une unité de soins spécialisée définie par arrêté des ministres chargés 

In [96]:
hrSystemMessage="""
You are a helpful human resources assistant.
I will ask you an HR related question and provide you with sources from "Code du travail" and "Convention collective".
"Convention collective" overrides "Code du travail".
I expect you to provide a concise response in French based on the sources which I provided you.
Only include my sources in your answers.
Provide sources used to generate the answer.
The response MUST be in JSON and MUST have the format: {"answer": "", "sources": [{"title": "", "url": ""}]}
"""

hrUserMessage=f"""
QUESTION:
{hrQuery}
------
CONVENTION COLLECTIVE:

------
CODE DU TRAVAIL:
{codeTravailSources}
"""

hrResponse = openAIClient.chat.completions.create(
  model=gpt3TurboModel,
  messages = [
    {"role": "system", "content": hrSystemMessage},
    {"role": "user", "content": hrUserMessage},
  ]
)

print(hrResponse.choices[0].message.content)

{"answer": "Le congé de paternité et d'accueil de l'enfant est de onze jours consécutifs, ou de dix-huit jours consécutifs en cas de naissances multiples, selon l'article L1225-35 du Code du travail. Le salarié doit avertir son employeur au moins un mois avant la date à laquelle il souhaite prendre le congé. Ce congé entraîne la suspension du contrat de travail. A l'issue du congé, le salarié retrouve son précédent emploi ou un emploi similaire assorti d'une rémunération au moins équivalente, conformément à l'article L1225-36 du Code du travail. De plus, la durée du congé de paternité est assimilée à une période de travail effectif pour la détermination des droits liés à l'ancienneté, et le salarié conserve tous les avantages acquis avant le début du congé, en vertu de l'article L1225-35-2 du Code du travail.", "sources": [{"title": "L1225-35", "url": "https://www.legifrance.gouv.fr/affichCodeArticle.do;?idArticle=LEGIARTI000042685498&cidTexte=LEGITEXT000006072050"}, {"title": "L1225-3