In [21]:
import os
from openai import OpenAI
import requests
import json
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
from pymongo import MongoClient


class AtlasClient ():


   def __init__ (self, altas_uri, dbname):
       self.mongodb_client = MongoClient(altas_uri)
       self.database = self.mongodb_client[dbname]


   ## A quick way to test if we can connect to Atlas instance
   def ping (self):
       self.mongodb_client.admin.command('ping')


   def get_collection (self, collection_name):
       collection = self.database[collection_name]
       return collection


   def find (self, collection_name, filter = {}, limit=10):
       collection = self.database[collection_name]
       items = list(collection.find(filter=filter, limit=limit))
       return items


   # https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/
   def vector_search(self, collection_name, index_name, attr_name, embedding_vector, limit=5):
       collection = self.database[collection_name]
       results = collection.aggregate([
           {
               '$vectorSearch': {
                   "index": index_name,
                   "path": attr_name,
                   "queryVector": embedding_vector,
                   "numCandidates": 50,
                   "limit": limit,
               }
           },
           ## We are extracting 'vectorSearchScore' here
           ## columns with 1 are included, columns with 0 are excluded
           {
               "$project": {
                   '_id' : 1,
                   'title' : 1,
                   'plot' : 1,
                   'year' : 1,
                   "search_score": { "$meta": "vectorSearchScore" }
           }
           }
           ])
       return list(results)


   def close_connection(self):
       self.mongodb_client.close()

In [4]:
mongo_uri = os.getenv("MONGODB_URI")
atlas_client = AtlasClient (mongo_uri, 'hyper')
atlas_client.ping()
print ('Connected to Atlas instance! We are good to go!')

Connected to Atlas instance! We are good to go!


In [6]:
import os
from pymongo import MongoClient

# Connect to local MongoDB Atlas
mongo_uri = os.getenv("MONGODB_URI")
if not mongo_uri:
    raise ValueError("MONGODB_URI environment variable is not set")

client = MongoClient(mongo_uri)
db = client.get_database("hyper")

print(f"Connected to database: {db.name}")


Connected to database: hyper


In [8]:
norman_collection = db["Norman"]

In [24]:
def gpt_system_user(
    system_message: str, user_message: str, model: str = "gpt-3.5-turbo"
):
    """Para usar en notebooks"""

    try:

        response = openai_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message}
            ],
            temperature=0,
            response_format={ "type": "json_object" }
        )

        return response.choices[0].message.content

    except openai.OpenAIError as e:
        print(f"OpenAI API request failed: {e}")
        return None



In [10]:
condensed_contexts =  [
        {
          "messages": [
              {
                "role": "user",
                "content": "Soy Alexander, mi madre se llama Sylvia, te paso los datos de ella mail: sylvia@gmail.com celu 1123343434."
              },
              {
                "role": "assistant",
                "content": "Excelente Alexander, como puedo ayudarte?"  
              },
              {
                "role": "user",
                "content": "quiero sacar un turno para ella porque no entiende nada de tecnologia y los turnos se sacan por esta via"
              },
              {
                "role": "assistant",
                "content": "No hay problema, sacamos el turno para ella, cual es el DNI de ella?"
              },
              {
                "role": "user",
                "content": "12668992"
              },
              {
                  "role": "assistant",
                  "content": "perfecto, digame que especialidad?"  
              },
              {
                  "role": "user",
                  "content": "ginecologia"
              },
              {
                  "role": "assistant",
                  "content": "Tengo turnos disponibles con el Dr. Facundo Farias para el 27 de agosto a las 10:00hs, le parece bien?"
              },
              {
                "role": "user",
                "content": "si"
              },
              {
                "role": "assistant",
                "content": "confirmado entonces el turno para Sylvia para el 27 de agosto a las 10:00hs con el Dr. Facundo Farias. Le envio el comprobante por mail?"
              },
              {
                "role": "user",
                "content": "si"
              },
              {
                "role": "assistant",
                "content": "Al correo de Sylvia?"
              },
              {
                "role": "user",
                "content": "si"
              },
              {
                "role": "assistant",
                "content": "Perfecto, le envio el comprobante al mail de ella. Algo mas que pueda ayudarte?"
              },
              {
                "role": "user",
                "content": "no, muchas gracias"
              }
           ],
          "metadata": {
            "session_id": "1",
            "person_id": "20",
            "context_id": "1",
            "created_at": "20200825T12:00:00.000Z",
          },
        },     
  ]


In [28]:
for context in condensed_contexts:
        # Join all messages in one string by a comma
        messages = ''.join([str(message) for message in context['messages']])
        # print(messages)
        # print(context['metadata'])
        extraction = gpt_system_user(
            system_message="""
              Extract every meaningful information about Alexander Ditzend from the provided conversation.
              Ask yourself, what happened in the conversation?
              What data about the user did you learn?
              At what date and time was this data gathered?


              Use this JSON example as a format, the master_node is Alexander Ditzend:
             
              {
                given_name: "Julia",
                family_name:"Hernandez",
                age:18,
                relationship_to_master_node:"sons_girlfriend"
                ...other metadata you consider relevant
              }

              """,
            user_message=f" METADATA: ```{context['metadata']} CONVERSATION: ```{messages}``` ```",
            model="gpt-4o-2024-08-06"
        )
        if extraction == None:
            print(f"Error: Openai API returned status error")
            continue
        else:
            print(f"{extraction} \n\n\n///////////////////////////\n\n\n")
            # Convert the extraction string to a dictionary
            extraction_dict = json.loads(extraction)
            
            # Insert the dictionary into the collection
            norman_collection.insert_one(extraction_dict)

{
  "given_name": "Alexander",
  "relationship_to_master_node": "self",
  "mother": {
    "given_name": "Sylvia",
    "email": "sylvia@gmail.com",
    "phone": "1123343434",
    "DNI": "12668992",
    "appointment": {
      "specialty": "ginecologia",
      "doctor": "Dr. Facundo Farias",
      "date": "2020-08-27",
      "time": "10:00"
    }
  },
  "data_gathered_at": "2020-08-25T12:00:00.000Z"
} 


///////////////////////////





El problema es que está toda la data mezclada. Así quedaría en el mismo vector. Pruebo pidiendo que devuelva una lista.

In [31]:
for context in condensed_contexts:
        # Join all messages in one string by a comma
        messages = ''.join([str(message) for message in context['messages']])
        # print(messages)
        # print(context['metadata'])
        extraction = gpt_system_user(
            system_message="""
              Extract every meaningful information about Alexander Ditzend from the provided conversation.
              Ask yourself, what happened in the conversation?
              What data about the user did you learn?
              At what date and time was this data gathered?
              Each piece of information should be an item in a list.
            Datetime is the moment the memory was created, use always ISO 8601 format.
        

              Use this JSON example as a format, the master_node is Alexander Ditzend:
             {
                "records": [
                {
                    "record_type": "personal_info",
                    "given_name": "Sylvia",
                    "relationship_to_master_node": "mother",
                    "relationship_from_master_node": "son",
                    "email": "sylvia@gmail.com",
                    "phone": "1123343434",
                    "DNI": "12668992",
                    "datetime": "2020-08-25T12:00:00.000Z",
                    },
                   {
                    "record_type": "appointment",
                    "for": "Sylvia",
                    "specialty": "ginecologia",
                    "doctor": "Dr. Facundo Farias",
                    "appointment_date": "2020-08-27",
                    "appointment_time": "10:00",
                    "datetime": "2020-08-27T10:00:00.000Z"
                    },
                    ...other records
                ]
              }
              """,
            user_message=f" METADATA: ```{context['metadata']} CONVERSATION: ```{messages}``` ```",
            model="gpt-4o-2024-08-06"
        )
        if extraction == None:
            print(f"Error: Openai API returned status error")
            continue
        else:
            print(f"{extraction} \n\n\n///////////////////////////\n\n\n")
            # Convert the extraction string to a dictionary
            extraction_dict = json.loads(extraction)
            
            # Insert the list of dictionaries into the collection
            norman_collection.insert_many(extraction_dict['records'])

{
  "records": [
    {
      "record_type": "personal_info",
      "given_name": "Sylvia",
      "relationship_to_master_node": "mother",
      "relationship_from_master_node": "son",
      "email": "sylvia@gmail.com",
      "phone": "1123343434",
      "DNI": "12668992",
      "datetime": "2020-08-25T12:00:00.000Z"
    },
    {
      "record_type": "appointment",
      "for": "Sylvia",
      "specialty": "ginecologia",
      "doctor": "Dr. Facundo Farias",
      "appointment_date": "2020-08-27",
      "appointment_time": "10:00",
      "datetime": "2020-08-25T12:00:00.000Z"
    }
  ]
} 


///////////////////////////





Mezclando data y fechas de dos contextos

In [8]:
user_input = "yo tengo algun turno asignado?"
filter = {"person_id": "20"}
db_results = db.similarity_search_with_score(query=user_input, embeddings=embeddings, filter=filter, k=2)
context_k2 = [f'\n {db_results[i][0].page_content} {db_results[i][0].metadata}\n---------------\n\n' for i in range(len(db_results))]
context = context_k2
for c in context:
    print(c)
system_prompt = f"""
You are the personal assistant of the user.
Use the user's name in your answers.
Direct your answers to the user in the second person.
You will find past conversations between you and the user between backticks.
Use them to answer the user's question.
To reason about dates, have in mind that today is September 13th, 2023.
Wait before answering, check when the data was gathered first.
Take a deep breath and work on this problem step-by-step.



```{context}```

"""
# answer = gpt_system_user(system_message=system_prompt, user_message=user_input, model="gpt-4")
answer = gpt_system_user(system_message=system_prompt, user_message=user_input)
# answer["text"]["content"]



 On August 25, at 12:00 PM the user provided the following information about themselves and their related person:
- User's name: Alexander
- User's related person: Sylvia (mother)
- Sylvia's email: sylvia@gmail.com
- Sylvia's phone number: 1123343434
- Sylvia's DNI (identification number): 12668992

Additionally, the user requested assistance in scheduling a medical appointment for Sylvia in the field of gynecology. The assistant provided available time slots for the appointment and confirmed a specific date and time with the user (August 27, at 10:00 AM with Dr. Facundo Farias). The user confirmed the appointment and requested that the appointment confirmation be sent to Sylvia's email.

In conclusion, the user's name is Alexander and they requested assistance in scheduling a gynecology appointment for their mother, Sylvia. The appointment was successfully scheduled for August 27, at 10:00 AM with Dr. Facundo Farias, and the appointment confirmation will be sent to Sylvia's email. {'

In [9]:
answer["text"]["content"]

'Hasta donde sé, el último turno que asignamos fue para Sylvia, tu madre, el 27 de agosto a las 10:00 AM con el Dr. Facundo Farias. Sin embargo, esta información fue proporcionada el 25 de agosto. Permíteme verificar si ha habido alguna actualización desde entonces. Por favor, dame un momento.'

Aca hay un error de razonamiento pero es mucho pedir para un prompt tan corto.

Posibles pasos siguientes:

- Navegar el grafo de relaciones del usuario y cambiar el punto focal actual. "este turno es para mi madre"
- Usar el mismo metodo para guardar cosas como "La madre del usuario tiene un turno para el dentista el 1 de enero de 2021"
- Sumar muchos mas datos personales y ver si hay confusiones
- Simular llamados a APIs desde diferentes relaciones del usuario y ver si hay confusiones


In [None]:
user_input = "que edad tenia yo en 1988?"
filter = {"person_id": "20"}
db_results = db.similarity_search_with_score(query=user_input, embeddings=embeddings, filter=filter, k=2)
context_k2 = [f'\n {db_results[i][0].page_content} {db_results[i][0].metadata}\n---------------\n\n' for i in range(len(db_results))]
context = context_k2
for c in context:
    print(c)
system_prompt = f"""
You are the best personal assistant of SAIA Air.
Your job is to help users in their travel needs.
Use the user's name in your answers.
You will find past conversations between you and the user between backticks.
Use them to answer the user's question.
To reason about dates, have in mind that today is September 13th, 2023.
Wait before answering, check when the data was gathered first.
Take a deep breath and work on this problem step-by-step.



```{context}```

"""
answer = gpt_system_user(system_message=system_prompt, user_message=user_input, model="gpt-4")
answer["text"]["content"]


NameError: name 'db' is not defined