In [1]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from openai import AzureOpenAI
from azure.search.documents.models import VectorizableTextQuery

from dotenv import load_dotenv
import os

In [2]:
load_dotenv(override=True)

AZURE_SEARCH_SERVICE=os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_KEY=os.getenv("AZURE_SEARCH_KEY")
AZURE_OPENAI_ENDPOINT=os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_VERSION=os.getenv("AZURE_OPENAI_VERSION")
AZURE_OPENAI_KEY=os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_DEPLOYMENT=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_STORAGE_CONNECTION=os.getenv("AZURE_STORAGE_CONNECTION")
AZURE_OPENAI_EMBEDDING_MODEL=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")

credential = AzureKeyCredential(AZURE_SEARCH_KEY)

In [3]:
openai_client = AzureOpenAI(
  api_key = AZURE_OPENAI_KEY,  
  api_version = AZURE_OPENAI_VERSION,
  azure_endpoint =AZURE_OPENAI_ENDPOINT,
  azure_deployment=AZURE_OPENAI_DEPLOYMENT,
)

index_name = "py-rag-training-idx"

search_client = SearchClient(
     endpoint=AZURE_SEARCH_SERVICE,
     index_name=index_name,
     credential=credential
 )

# Prompt is unchanged in this update
GROUNDED_PROMPT="""
You are an AI assistant that helps users learn from the information found in the source material.
Answer the query using only the sources provided below.
Use bullets if the answer has multiple points.
If the answer is longer than 3 sentences, provide a summary.
Answer ONLY with the facts listed in the list of sources below. Cite your source when you answer the question
If there isn't enough information below, say you don't know.
Do not generate answers that don't use the sources below.
Query: {query}
Sources:\n{sources}
"""

# Queries are unchanged in this update
query="Are there any cloud formations specific to oceans and large bodies of water?"
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")

# Add query_type semantic and semantic_configuration_name
# Add scoring_profile and scoring_parameters
search_results = search_client.search(
    query_type="semantic",
    semantic_configuration_name="my-semantic-config",
    scoring_profile="my-scoring-profile",
    scoring_parameters=["tags-ocean, 'sea surface', seas, surface"],
    search_text=query,
    vector_queries= [vector_query],
    select="title, chunk, locations",
    top=5,
)
sources_formatted = "=================\n".join([f'TITLE: {document["title"]}, CONTENT: {document["chunk"]}, LOCATIONS: {document["locations"]}' for document in search_results])

response = openai_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": GROUNDED_PROMPT.format(query=query, sources=sources_formatted)
        }
    ],
    model=AZURE_OPENAI_DEPLOYMENT,
)

print(response.choices[0].message.content)

Yes, there are several cloud formations specific to oceans and large bodies of water:

- Marine stratocumulus clouds are a persistent feature off the coasts of Peru and Chile over the Pacific Ocean. These low-level clouds are essentially fog and develop most often during the winter and early spring. They can be pushed inland by prevailing winds and are easily blocked by coastal mountains, often filling valleys that open to the ocean (page-15.pdf).
- Ship tracks are narrow clouds that form over the ocean (e.g., the Pacific Ocean) when water vapor condenses around particles from ship exhaust. These clouds are often brighter than typical marine clouds and can stretch for hundreds of kilometers (page-31.pdf).
- Cloud streets—parallel rows of clouds—form over regions like the Bering Strait in the Arctic Ocean when cold winds blow across warmer, moister ocean surfaces. These "streets" align with the wind direction and are a result of spinning cylinders of air (page-21.pdf).
- Undular bores o