Infuencia de la estructura del contexto en el razonamiento cronológico

In [1]:
import os

import requests
import json
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [10]:
index_name = "conversations"
index_path = "../indexes/conversations"
embeddings = OpenAIEmbeddings()
# db = FAISS.from_texts(texts=['9'], embedding=embeddings)
# db.save_local(folder_path=index_path, index_name=index_name) 
# db = FAISS.load_local(folder_path=index_path, index_name=index_name, embeddings=embeddings)


In [32]:
def gpt_system_user(
    system_message: str, user_message: str, model: str = "gpt-3.5-turbo"
):
    """Para usar en notebooks"""
    # Hyper 2
    # POST https://yoizenia.openai.azure.com/openai/deployments/GPT35Turbo/chat/completions

    try:
        response = requests.post(
            url="https://api.openai.com/v1/chat/completions",
            params={
                "temperature": "0",
            },
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {OPENAI_API_KEY}",
            },
            data=json.dumps(
                {
                    "model": model,
                    "messages": [
                        {"content": system_message, "role": "system"},
                        {"content": user_message, "role": "user"},
                    ]
                }
            ),
            timeout=10,
        )
        choices = response.json()["choices"]
        answer = {
            "status_code": response.status_code,
            "text": choices[0]["message"],
        }
        return answer

    except requests.exceptions.RequestException:
        print("HTTP Request failed")
        return None


In [12]:
condensed_contexts =  [
        {
          "messages": [
              {
                "role": "user",
                "content": "Quiero un turno para mi madre"
              },
              {
                "role": "system",
                "content": "cual es el nombre de tu madre?"
              },
              {
                "role": "user",
                "content": "Alba"
              },
              {
                "role": "system",
                "content": "y que edad tiene?"
              },
              {
                "role": "user",
                "content": "66"
              },
              {
                  "role": "system",
                  "content": "perfecto, tiene turno para el 30 de agosto a las 15:00"
              }
           ],
          "metadata": {
            "session_id": "1",
            "person_id": "20",
            "context_id": "1",
            "created_at": "20230825T12:00:00.000Z",
          },
        },
        {
            "messages": [
                {
                "role": "user",
                "content": "Cual es la capital de Colombia?"
                },
                {
                    "role": "system",
                    "content": "Bogota"
                }
            ],
            "metadata": {
                "session_id": "1",
                "person_id": "20",
                "context_id": "2",
                "created_at": "20230825T18:00:00.000Z",                
              }
        }
  ]


In [37]:
db = FAISS.from_texts(texts=['notebook-9'], embedding=embeddings)
for context in condensed_contexts:
        # Join all messages in one string by a comma
        messages = ''.join([str(message) for message in context['messages']])
        # print(messages)
        # print(context['metadata'])
        extraction = gpt_system_user(
            system_message="""
              Extract every meaningful information about the user from the provided conversation.
              Ask yourself, what happened in the conversation?
              What data about the user did you learn?
              At what date and time was this data gathered?
              """,
            user_message=f" METADATA: ```{context['metadata']} CONVERSATION: ```{messages}``` ```",
            model="gpt-3.5-turbo",
        )
        print(f"{extraction['text']['content']} \n\n\n///////////////////////////\n\n\n")
        db.add_texts(
            texts=[extraction['text']['content']], 
            metadatas=[context['metadata']],
        )

In the conversation, the user mentioned that they want to schedule an appointment for their mother. The user's mother's name is Alba and she is 66 years old. This conversation took place on August 25, 2023, at 12:00 PM. The system confirmed that the mother has an appointment on August 30th at 3:00 PM. 


///////////////////////////



Data Gathered:
- The user asked a question: "Cual es la capital de Colombia?"
- The system responded: "Bogota"

Date and Time of Data Gathering:
- The conversation data was gathered on August 25, 2308 at 18:00:00 UTC. 


///////////////////////////





Preguntamos por la edad simplemente, para empezar

In [39]:
user_input_3 = "que edad tiene alba?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: In the conversation, the user mentioned that they want to schedule an appointment for their mother. The user's mother's name is Alba and she is 66 years old. This conversation took place on August 25, 2023, at 12:00 PM. The system confirmed that the mother has an appointment on August 30th at 3:00 PM., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20230825T12:00:00.000Z'} }


'Alba tiene 66 años.'

Ahora vemos siGPT 3.5 puede razonar con la fecha

In [40]:
user_input_3 = "que edad tenía alba en 2020?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: In the conversation, the user mentioned that they want to schedule an appointment for their mother. The user's mother's name is Alba and she is 66 years old. This conversation took place on August 25, 2023, at 12:00 PM. The system confirmed that the mother has an appointment on August 30th at 3:00 PM., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20230825T12:00:00.000Z'} }


'Alba tenía 63 años en 2020.'

Vemos que con esta forma de guardar el dato, 3.5 puede razonar bien la edad.

In [41]:
user_input_3 = "que edad tenía alba en 1970?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: In the conversation, the user mentioned that they want to schedule an appointment for their mother. The user's mother's name is Alba and she is 66 years old. This conversation took place on August 25, 2023, at 12:00 PM. The system confirmed that the mother has an appointment on August 30th at 3:00 PM., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20230825T12:00:00.000Z'} }


'La edad de Alba en 1970 sería 13 años.'

In [42]:
user_input_3 = "conoces a mi madre?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: In the conversation, the user mentioned that they want to schedule an appointment for their mother. The user's mother's name is Alba and she is 66 years old. This conversation took place on August 25, 2023, at 12:00 PM. The system confirmed that the mother has an appointment on August 30th at 3:00 PM., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20230825T12:00:00.000Z'} }


'Sí, conozco a tu madre. Su nombre es Alba y tiene 66 años. ¿En qué puedo ayudarte hoy?'