In [2]:
from azure.core.credentials import AzureKeyCredential
import openai
from openai.embeddings_utils import cosine_similarity, get_embedding
from azure.search.documents import SearchClient
from azure.search.documents.models import RawVectorQuery
from dotenv import load_dotenv
import os
import json

load_dotenv("./.env");

AZURE_OPENAI_URL = os.getenv("OPENAI_API_BASE_URL")
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_KEY = os.getenv("AZURE_SEARCH_KEY")
AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX")
EMBEDDING_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME")
COMPLETIONS_MODEL = os.getenv("AZURE_OPENAI_CHATGPT_MODEL")

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_type = str(os.getenv("OPENAI_HOST"))
openai.api_base = AZURE_OPENAI_URL
openai.api_version = os.getenv("OPENAI_API_VERSION")

In [3]:
def get_function_call(messages, function_call = "auto"):
    functions = [
        {
            "name": "get_filters",
            "description": "Get filter for search",
            "parameters": {
                "type": "object",
                "properties": {
                    "year": {
                        "type": "string",
                        "description": "The year of the query",
                    }
                },
            },
        },
    ]

    # Call the model with the user query (messages) and the functions defined in the functions parameter
    response = openai.ChatCompletion.create(
        deployment_id = COMPLETIONS_MODEL,
        messages=messages,
        functions=functions,
        function_call=function_call, 
    )

    return response

In [4]:
prompt = [{"role": "user", "content": "¿cuántas horas se han dedicado a formación en el año 2022?"}]
response = get_function_call(prompt, "auto")
response_message = response["choices"][0]["message"]
print(response_message.get("function_call"))
function_args = json.loads(response_message["function_call"]["arguments"])

{
  "name": "get_filters",
  "arguments": "{\n  \"year\": \"2022\"\n}"
}


In [5]:
def create_filter_expression(json_obj):
    return ' and '.join([f"{k} eq '{v}'" for k, v in json_obj.items()])

filter = create_filter_expression(function_args)
print(filter)

year eq '2022'


In [32]:
credential = AzureKeyCredential(AZURE_SEARCH_KEY)
search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, index_name=AZURE_SEARCH_INDEX, credential=credential)

search_query = "¿cuántas horas se han dedicado a formación?"
search_vector = get_embedding(search_query, engine=EMBEDDING_MODEL)
r = search_client.search(None, top=5, vector_queries=[RawVectorQuery(vector=search_vector, k=50, fields="embedding")], filter=filter)

content = ""
for doc in r:
    doccontent = doc["content"].replace("\n", " ")
    content += "\n" + f"{doc['sourcepage']}: {doccontent}"
    
# results = [doc[self.sourcepage_field] + ": " + doc[self.content_field] async for doc in r]
# content = "\n".join(results)
        
print(content)


memoria-social-encamina-2022.pdf#page=63:  Seis alumnos de algunos de estos centros que han realizado prácticas en ENCAMINA ya han entrado a formar parte de la plantilla al finalizar su beca. <table><tr><th colSpan=4></th></tr><tr><th></th><th></th><th></th><th></th></tr><tr><td></td><td></td><td rowSpan=4>101%</td><td rowSpan=4></td></tr><tr><td></td><td></td></tr><tr><td></td><td rowSpan=2></td></tr><tr><td>4</td></tr></table> 63ColaboraciónMemoria Social ENCAMINA 2022 Orgullosos de compartir con la comunidad techie todo lo que sabemos Aportamos nuestra experiencia y conocimiento TIC como parte de nuestra RSC +120 +100 Sesiones online Charlas 0-0 FEB 000 O 2 +75 +50 Eventos Patrocinios Blog OC :unselected: :unselected: +120 +70 Artículos Vídeos 66 :unselected: :unselected:Colaboración +120 sesiones online Vinculados a la tecnología más innovadora, vía streaming Disfrutamos conectándonos con la comunidad y con nuestros clientes a través de webinars propios y en colaboración con Micro

In [29]:
system_chat_template = """
You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. 
Use 'you' to refer to the individual asking the questions even if they ask with 'I'. 
Answer the following question using only the data provided in the sources below. 
For tabular information return it as an html table. Do not return markdown format. 
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. 
If you cannot answer using the sources below, say you don't know. Use below example to answer. 
"""

question = """ 
'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'

Sources:
info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.
info2.pdf: Overlake is in-network for the employee plan.
info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.
info4.pdf: In-network institutions include Overlake, Swedish and others in the region
"""

answer = """ 
In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].
"""

In [30]:
messages = []
messages.append(
    {
        "role": "system",
        "content": system_chat_template,
    }
)
messages.append(
    {
        "role": "system",
        "content": answer
    }
)
messages.append(
    {
        "role": "user",
        "content": question
    }
)

user_content = search_query + "\n" + f"Sources:\n {content}"

messages.append(    
    {
        "role": "user",
        "content": user_content,
    }
)


In [31]:
response = openai.ChatCompletion.create(
    messages=messages,
    deployment_id=COMPLETIONS_MODEL
)

print(response['choices'][0]['message'])

{
  "role": "assistant",
  "content": "I'm sorry, but I couldn't find any information about training hours in the provided sources."
}
