In [1]:
from azure.core.credentials import AzureKeyCredential
import openai
from openai.embeddings_utils import cosine_similarity, get_embedding
from azure.search.documents import SearchClient
from azure.search.documents.models import RawVectorQuery
from dotenv import load_dotenv
import os
import json

load_dotenv("./.env");

AZURE_OPENAI_URL = os.getenv("OPENAI_API_BASE_URL")
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE_URI")
AZURE_SEARCH_KEY = os.getenv("AZURE_SEARCH_KEY")
AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX")
EMBEDDING_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME")
COMPLETIONS_MODEL = os.getenv("AZURE_OPENAI_CHATGPT_MODEL")

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_type = str(os.getenv("OPENAI_HOST"))
openai.api_base = AZURE_OPENAI_URL
openai.api_version = os.getenv("OPENAI_API_VERSION")

In [59]:
def get_function_call(messages, function_call = "auto"):
    functions = [
        {
            "name": "get_filters",
            "description": "Get filter for search",
            "parameters": {
                "type": "object",
                "properties": {
                    "year": {
                        "type": "string",
                        "description": "The year of the query",
                    }
                },
            },
        },
    ]

    # Call the model with the user query (messages) and the functions defined in the functions parameter
    response = openai.ChatCompletion.create(
        deployment_id = COMPLETIONS_MODEL,
        messages=messages,
        functions=functions,
        function_call=function_call, 
    )

    return response

def create_filter_expression(json_obj):
    return ' and '.join([f"{k} eq '{v}'" for k, v in json_obj.items()])

def vector_search(search_query, filter):
    credential = AzureKeyCredential(AZURE_SEARCH_KEY)
    search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, index_name=AZURE_SEARCH_INDEX, credential=credential)

    search_vector = get_embedding(search_query, engine=EMBEDDING_MODEL)
    r = search_client.search(None, top=5, vector_queries=[RawVectorQuery(vector=search_vector, k=5, fields="embedding", exhaustive=True)], filter=filter)

    content = ""
    for doc in r:
        doccontent = doc["content"].replace("\n", " ")
        content += "\n" + f"{doc['sourcepage']}: {doccontent}"
        print(f"{doc['@search.score']}: {doc['sourcepage']}: {doccontent}")
    
    return content

In [57]:
# search_query = "¿cuántas horas se han dedicado a formación en el año 2022?"
# search_query = "¿cuántas certificaciones en el año 2022?"
# search_query = "¿qué es pensar en colores?"
# search_query = "¿qué premios se recibieron en el año 2021?"
# search_query = "¿Cuántos MVP habían en el año 2018?"
search_query = "¿Qué webinars se hicieron en el 2018?"

prompt = [{"role": "user", "content": search_query}]
response = get_function_call(prompt, "auto")
response_message = response["choices"][0]["message"]

if response_message.get("function_call"):
    print(response_message.get("function_call"))
    function_args = json.loads(response_message["function_call"]["arguments"])
    filter = create_filter_expression(function_args)
    print(filter)
else:
    print("No function call")
    filter = ""

{
  "name": "get_filters",
  "arguments": "{\n  \"year\": \"2018\"\n}"
}
year eq '2018'


In [44]:
credential = AzureKeyCredential(AZURE_SEARCH_KEY)
search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, index_name=AZURE_SEARCH_INDEX, credential=credential)

search_vector = get_embedding(search_query, engine=EMBEDDING_MODEL)
r = search_client.search(None, top=5, vector_queries=[RawVectorQuery(vector=search_vector, k=5, fields="embedding", exhaustive=True)], filter=filter)

content = ""
for doc in r:
    doccontent = doc["content"].replace("\n", " ")
    content += "\n" + f"{doc['sourcepage']}: {doccontent}"
    print(f"{doc['@search.score']}: {doc['sourcepage']}: {doccontent}")
    
# results = [doc[self.sourcepage_field] + ": " + doc[self.content_field] async for doc in r]
# content = "\n".join(results)

0.83984154: memoria-social-encamina-2018.pdf#page=13: Adrián Díaz habló de las  herramientas digitales que permiten optimizar  el trabajo de los profesores en la corrección  de exámenes y que mejoran los procesos  entre alumnos, padres, profesores y centros.“REVISE: Cómo facilitar  la corrección masiva de  exámenes aprovechando   el cloud de Microsoft”En este webinar hablamos de los beneficios  de digitalizar los puestos de trabajo de la  primera línea industrial y cómo la corporación  puede proporcionar a los firstline workers  herramientas digitales modernas que les  hagan sentirse más implicados y alineados  con la cultura empresarial. De nuevo lo  impartió Daniel Alonso.“Office 365 F1 para   sector industrial”Webinar enfocado a empresas del sector  retail que ya conocen las ventajas de Office  365 entre su personal de oficina, pero que  todavía no le ven el ROI a usarlo entre sus  trabajadores de primera línea. Impartido  por Daniel Alonso, Cloud Solutions Principal  Advisor de ENC

In [45]:
system_chat_template = """
You are an intelligent assistant helping employees with our social memory questions. 
Use 'you' to refer to the individual asking the questions even if they ask with 'I'. 
Answer the following question using only the data provided in the sources below. 
For tabular information return it as an html table. Do not return markdown format. 
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. 
If you cannot answer using the sources below, say you don't know. Use below example to answer. 
"""

question = """ 
'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'

Sources:
info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.
info2.pdf: Overlake is in-network for the employee plan.
info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.
info4.pdf: In-network institutions include Overlake, Swedish and others in the region
"""

answer = """ 
In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].
"""

In [48]:
messages = []
messages.append({ "role": "system", "content": system_chat_template })
messages.append({ "role": "user", "content": question})
messages.append({ "role": "system", "content": answer})

search_query = "¿Qué webinars se hicieron?"

user_content = search_query + "\n" + f"Sources:\n {content}"
messages.append({ "role": "user", "content": user_content})

In [29]:
print(messages)

[{'role': 'system', 'content': "\nYou are an intelligent assistant helping employees with our social memory questions. \nUse 'you' to refer to the individual asking the questions even if they ask with 'I'. \nAnswer the following question using only the data provided in the sources below. \nFor tabular information return it as an html table. Do not return markdown format. \nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. \nIf you cannot answer using the sources below, say you don't know. Use below example to answer. \n"}, {'role': 'user', 'content': " \n'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'\n\nSources:\ninfo1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.\ninfo2.pdf: Overlake is in-network 

In [49]:
response = openai.ChatCompletion.create(
    messages=messages,
    deployment_id=COMPLETIONS_MODEL,
    temperature=0.7,
)

print(response['choices'][0]['message'])

{
  "role": "assistant",
  "content": "Los webinars que se hicieron son los siguientes:\n\n1. \"REVISE: C\u00f3mo facilitar la correcci\u00f3n masiva de ex\u00e1menes aprovechando el cloud de Microsoft\" impartido por Adri\u00e1n D\u00edaz [memoria-social-encamina-2018.pdf#page=13]\n2. \"Office 365 F1 para sector industrial\" impartido por Daniel Alonso [memoria-social-encamina-2018.pdf#page=13]\n3. \"Es hora de transformar tu Primera L\u00ednea: Office 365 F1 para sector retail\" impartido por Daniel Alonso [memoria-social-encamina-2018.pdf#page=13]\n4. \"El futuro es ahora\" impartido por Alberto D\u00edaz, Gustavo V\u00e9lez, Juan Carlos Gonz\u00e1lez y Fabian Imaz [memoria-social-encamina-2018.pdf#page=13]\n5. \"Haciendo Workflows con Azure\" impartido por Adr\u00edan D\u00edaz Cervera, Sergio Hern\u00e1ndez, Luis Ma\u00f1ez y Rub\u00e9n Ramos [memoria-social-encamina-2018.pdf#page=13]\n6. \"Seguridad en O365 y Azure\" impartido por Daniel Alonso [memoria-social-encamina-2018.pdf#p