Agregado de un paso de extracción y limpieza de datos a un conjunto de datos con 4 miembros de un grupo familiar.

In [1]:
import os

import requests
import json
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
index_name = "conversations"
index_path = "../indexes/conversations"
embeddings = OpenAIEmbeddings()
# db = FAISS.from_texts(texts=['9'], embedding=embeddings)
# db.save_local(folder_path=index_path, index_name=index_name) 
# db = FAISS.load_local(folder_path=index_path, index_name=index_name, embeddings=embeddings)


In [3]:
def gpt_system_user(
    system_message: str, user_message: str, model: str = "gpt-3.5-turbo"
):
    """Para usar en notebooks"""
    # Hyper 2
    # POST https://yoizenia.openai.azure.com/openai/deployments/GPT35Turbo/chat/completions

    try:
        response = requests.post(
            url="https://api.openai.com/v1/chat/completions",
            params={
                "temperature": "0",
            },
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {OPENAI_API_KEY}",
            },
            data=json.dumps(
                {
                    "model": model,
                    "messages": [
                        {"content": system_message, "role": "system"},
                        {"content": user_message, "role": "user"},
                    ]
                }
            ),
            timeout=10,
        )
        choices = response.json()["choices"]
        answer = {
            "status_code": response.status_code,
            "text": choices[0]["message"],
        }
        return answer

    except requests.exceptions.RequestException:
        print("HTTP Request failed")
        return None


Se crea un conjunto de datos para conjugar razonamiento cronológico con mútliples lazos familiares.

In [4]:
condensed_contexts =  [
        {
          "messages": [
              {
                "role": "user",
                "content": "Voy a sacar un pasaje para la familia de buenos aires a cordoba"
              },
              {
                "role": "system",
                "content": "quienes son los pasajeros?"
              },
              {
                "role": "user",
                "content": "Ana es mi hija mayor, tiene 20 años. Juan es mi hijo menor, tiene 10 años. Mi esposa se llama Maria y tiene 40 años. Yo me llamo Pedro y tengo 45 años"
              },
              {
                "role": "system",
                "content": "en que fecha quiere viajar?"
              },
              {
                "role": "user",
                "content": "del 20 de agosto al 30 de agosto"
              },
              {
                  "role": "system",
                  "content": "perfecto, salen el 20 de agosto a las 10:00 y vuelven el 30 de agosto a las 18:00. El precio total es de 1000 dolares. Desea confirmar la compra?"
              },
              {
                  "role": "user",
                  "content": "si"
              },
              {
                  "role": "system",
                  "content": "Gracias por su compra"
              }
           ],
          "metadata": {
            "session_id": "1",
            "person_id": "20",
            "context_id": "1",
            "created_at": "20200825T12:00:00.000Z",
          },
        },
        {
            "messages": [
                {
                "role": "user",
                "content": "Quiero subir de categoria en el programa de viajero frecuente"
                },
                {
                    "role": "system",
                    "content": "que categoria tiene actualmente?"
                },
                { 
                  "role": "user",
                  "content": "soy categoria 1"
                },
                {
                  "role": "system",
                  "content": "para subir de categoria debe tener 1000 millas. Cuantas millas tiene actualmente?"
                },
                {
                  "role": "user",
                  "content": "tengo 500 millas"
                },
                {
                  "role": "system",
                  "content": "puede comprar 500 millas por 100 dolares. Desea comprarlas?"
                },
                {
                  "role": "user",
                  "content": "si"
                },
                {
                  "role": "system",
                  "content": "Gracias por su compra. Ahora tiene 1000 millas y es categoria 2"
                }
            ],
            "metadata": {
                "session_id": "1",
                "person_id": "20",
                "context_id": "2",
                "created_at": "20220825T18:00:00.000Z",                
              }
        }
  ]


Prompt de extracción y reconfiguración de información.

In [5]:
db = FAISS.from_texts(texts=['notebook-9'], embedding=embeddings)
for context in condensed_contexts:
        # Join all messages in one string by a comma
        messages = ''.join([str(message) for message in context['messages']])
        # print(messages)
        # print(context['metadata'])
        extraction = gpt_system_user(
            system_message="""
              Extract every meaningful information about the user from the provided conversation.
              Ask yourself, what happened in the conversation?
              What data about the user did you learn?
              At what date and time was this data gathered?
              """,
            user_message=f" METADATA: ```{context['metadata']} CONVERSATION: ```{messages}``` ```",
            model="gpt-3.5-turbo",
        )
        print(f"{extraction['text']['content']} \n\n\n///////////////////////////\n\n\n")
        db.add_texts(
            texts=[extraction['text']['content']], 
            metadatas=[context['metadata']],
        )

Data Gathered:
- The user's name is Pedro.
- Pedro has a daughter named Ana, who is 20 years old.
- Pedro has a son named Juan, who is 10 years old.
- Pedro's wife's name is Maria and she is 40 years old.
- Pedro is 45 years old.
- Pedro is planning to book a flight for his family from Buenos Aires to Cordoba.
- The travel dates are from August 20th to August 30th.
- The departure time is at 10:00 on August 20th.
- The return time is at 18:00 on August 30th.
- The total price for the flight is 1000 dollars.
- The purchase was confirmed.

Date and Time: August 25th, 2020 at 12:00:00 UTC. 


///////////////////////////



Data Gathered:
- The user wants to upgrade their frequent traveler program category.
- The user's current category is category 1.
- The user has 500 miles.
- The user is willing to purchase 500 miles for $100.
- The user has successfully purchased the additional miles.
- The user now has 1000 miles and is category 2.

Date and Time: The data was gathered on August 25, 2

Preguntamos por la edad simplemente, para empezar

In [6]:
user_input_3 = "que edad tiene mi hija mayor?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: Data Gathered:
- The user's name is Pedro.
- Pedro has a daughter named Ana, who is 20 years old.
- Pedro has a son named Juan, who is 10 years old.
- Pedro's wife's name is Maria and she is 40 years old.
- Pedro is 45 years old.
- Pedro is planning to book a flight for his family from Buenos Aires to Cordoba.
- The travel dates are from August 20th to August 30th.
- The departure time is at 10:00 on August 20th.
- The return time is at 18:00 on August 30th.
- The total price for the flight is 1000 dollars.
- The purchase was confirmed.

Date and Time: August 25th, 2020 at 12:00:00 UTC., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20200825T12:00:00.000Z'} }


'Tu hija mayor, Ana, tiene 20 años.'

Error en el prompt, porque debe razonar que estamos en 2023.

Ahora vemos siGPT 3.5 puede razonar con la fecha

In [7]:
user_input_3 = "que edad tenía mi hija mayor cuando hicimos el viaje a cordoba?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: Data Gathered:
- The user's name is Pedro.
- Pedro has a daughter named Ana, who is 20 years old.
- Pedro has a son named Juan, who is 10 years old.
- Pedro's wife's name is Maria and she is 40 years old.
- Pedro is 45 years old.
- Pedro is planning to book a flight for his family from Buenos Aires to Cordoba.
- The travel dates are from August 20th to August 30th.
- The departure time is at 10:00 on August 20th.
- The return time is at 18:00 on August 30th.
- The total price for the flight is 1000 dollars.
- The purchase was confirmed.

Date and Time: August 25th, 2020 at 12:00:00 UTC., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20200825T12:00:00.000Z'} }


'Tu hija mayor, Ana, tenía 20 años cuando hicieron el viaje a Córdoba.'

In [8]:
user_input_3 = "en que año nació mi hija mayor?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. You will find past conversations between you and the user between backticks.
Use them to answer the user's question.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: Data Gathered:
- The user's name is Pedro.
- Pedro has a daughter named Ana, who is 20 years old.
- Pedro has a son named Juan, who is 10 years old.
- Pedro's wife's name is Maria and she is 40 years old.
- Pedro is 45 years old.
- Pedro is planning to book a flight for his family from Buenos Aires to Cordoba.
- The travel dates are from August 20th to August 30th.
- The departure time is at 10:00 on August 20th.
- The return time is at 18:00 on August 30th.
- The total price for the flight is 1000 dollars.
- The purchase was confirmed.

Date and Time: August 25th, 2020 at 12:00:00 UTC., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20200825T12:00:00.000Z'} }


'Según la información que tengo, su hija mayor, Ana, tiene 20 años. Si restamos 20 años a la fecha actual, podemos determinar que Ana nació en el año 2000.'

Hace bien la cuenta. Pero no sabe que 2023 es el año actual.

In [9]:
user_input_3 = "en que año nació mi hija mayor?"
filter = {"person_id": "20"}
db_results_3 = db.similarity_search_with_score(query=user_input_3, embeddings=embeddings, filter=filter, k=1)
context_3 = "{ message: " + db_results_3[0][0].page_content + ", metadata: " + str(db_results_3[0][0].metadata) + " }"
print(f"CONTEXT:{context_3}")
system_prompt_3 = f"""
You are a personal assistant. 
You will find past conversations between you and the user between backticks.
Use them to answer the user's question.
To reason about dates, have in mind that today is September 13th, 2023.
You are now located in Buenos Aires, Argentina.
Take a deep breath and work on this problem step-by-step.

```{context_3}```

"""
answer_3 = gpt_system_user(system_message=system_prompt_3, user_message=user_input_3)
answer_3["text"]["content"]


CONTEXT:{ message: Data Gathered:
- The user's name is Pedro.
- Pedro has a daughter named Ana, who is 20 years old.
- Pedro has a son named Juan, who is 10 years old.
- Pedro's wife's name is Maria and she is 40 years old.
- Pedro is 45 years old.
- Pedro is planning to book a flight for his family from Buenos Aires to Cordoba.
- The travel dates are from August 20th to August 30th.
- The departure time is at 10:00 on August 20th.
- The return time is at 18:00 on August 30th.
- The total price for the flight is 1000 dollars.
- The purchase was confirmed.

Date and Time: August 25th, 2020 at 12:00:00 UTC., metadata: {'session_id': '1', 'person_id': '20', 'context_id': '1', 'created_at': '20200825T12:00:00.000Z'} }


'Según la información que tengo, tu hija mayor, Ana, tiene 20 años y la fecha actual es el 13 de septiembre de 2023. Si hacemos el cálculo, Ana nació en el año 2003.'

Mejor, pero Ana no tiene 20. Tiene 23 hoy.