# Database utils helping store history

In [3]:
import sqlite3
from datetime import datetime
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, T
import openai
from sklearn.feature_extraction.text import TfidfVectorizer

def setup_database(db_name):
    conn = sqlite3.connect(db_name)
    cur = conn.cursor()
    cur.execute('''CREATE TABLE IF NOT EXISTS mem
                   (id INTEGER PRIMARY KEY, text TEXT, role TEXT, ts TEXT, categories TEXT, labels TEXT,
                   embedding REAL, continued INTEGER, level1 TEXT, level2 TEXT, level3 TEXT, user_id Text)''')
    conn.commit()
    return conn, cur

def insert_mem(cur, data):
    cur.execute('''INSERT INTO mem (text, role, ts, categories, labels, embedding, continued, level1, level2, level3, user_id)
                   VALUES (:text, :role, :ts, :categories, :labels, :embedding, :continued, :level1, :level2, :level3, :user_id)''', data)


def read_mems(cur):
    cur.execute("SELECT * FROM mem")
    return cur.fetchall()

def update_mem(cur, id, data):
    data['id'] = id  # Ensure the ID is included in the data dictionary for the WHERE clause
    cur.execute('''UPDATE mem SET text=:text, role=:role, ts=:ts, categories=:categories, labels=:labels,
                   embedding=:embedding, continued=:continued, level1=:level1, level2=:level2, level3=:level3, user_id=:user_id
                   WHERE id=:id''', data)





In [10]:
# Main flow
conn, cur = setup_database('memory.db')

data = {
    'text': 'Sample text',
    'role': 'admin',
    'ts': '2023-01-01 12:00:00',
    'categories': 'category1,category2',
    'labels': 'label1,label2',
    'embedding': 0.123,
    'continued': 1,
    'level1': 'L1',
    'level2': 'L2',
    'level3': 'L3'
}

insert_mem(cur, data)
conn.commit()

# Closing cursor and connection
cur.close()
conn.close()

# Dev Plan
1. Given a message, find related history, 
    1. ask GPT to label if it is followup from previous questions. 
    2. score previous history for embedding distance
    3. score history for keyword relevance 
    4. based on scores and time, calculate decay factor
    5. summarize to fixed length given decay factor
    6. given relevance and keyword position, implement a gradient weighting summary for the message. keyword matching will be the focus and environment words are with decaying relevance scores.  
2. formulate the prompt based on history and scores 

In [None]:
conn, cur = setup_database('memory.db')
read_mems(cur)



def connect_db(db_path='memory.db'):
    """Connect to the SQLite database."""
    conn = sqlite3.connect(db_path)
    return conn

def get_history(conn, user_id, limit=100):
    """Retrieve the message history for a given user."""
    cursor = conn.cursor()
    query = "SELECT * FROM mem WHERE user_id = ? ORDER BY ts DESC LIMIT ?"
    cursor.execute(query, (user_id, limit))
    return cursor.fetchall()

def update_message_info(conn, message_id, **kwargs):
    """Update information for a given message."""
    cursor = conn.cursor()
    columns = ', '.join([f"{k} = ?" for k in kwargs.keys()])
    values = list(kwargs.values()) + [message_id]
    query = f"UPDATE mem SET {columns} WHERE id = ?"
    cursor.execute(query, values)
    conn.commit()


In [None]:
def label_followup_messages(conn, history):
    """
    Label each message in the history as a followup or not.

    Parameters:
    - conn: Database connection object.
    - history: A list of history messages fetched from the database.
               Each message is expected to be a tuple or list in the format:
               (id, text, role, ts, categories, labels, embedding, continued, level1, level2, level3)

    This function iterates through the history, uses GPT to determine if each message is a followup,
    and updates the `continued` field in the database accordingly.
    """
    # Assuming the GPT interaction function is named `ask_gpt_if_followup`
    # and returns a boolean indicating if the message is a followup.
    
    for i, current_message in enumerate(history[:-1]):  # Skip the last message since there's no next message to compare with
        current_message_id, current_text, *_ = current_message
        next_message = history[i + 1]
        next_message_text = next_message[1]  # Assuming the text is the second field
        
        # Generate the prompt for GPT
        prompt = f"Is the following message a followup to the previous one?\n\nPrevious message: {next_message_text}\nCurrent message: {current_text}"
        
        # Mock GPT interaction
        # In a real scenario, replace this with an actual GPT call, e.g., through OpenAI's API.
        is_followup = ask_gpt_if_followup(prompt)
        
        # Update the database with the result
        # The `continued` column is updated with 1 if it's a followup, otherwise 0
        update_message_info(conn, current_message_id, continued=int(is_followup))


def setup_openai_api():
    """Configure OpenAI API key."""
    # Replace 'your_api_key_here' with your actual OpenAI API key
    openai.api_key = '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'


def ask_gpt_if_followup(previous_message, current_message):
    """
    Use OpenAI's GPT-4 to determine if the current message is a followup to the previous one.

    Parameters:
    - previous_message: String, the content of the previous message in the conversation.
    - current_message: String, the content of the current message.

    Returns:
    - True if GPT-4 determines the message is a followup, False otherwise.
    """
    setup_openai_api()  # Ensure API key is set
    
    # Formulate the prompt for GPT-4
    prompt = f"Given the previous message: '{previous_message}', is the following message a follow-up? '{current_message}' Please answer 'yes' or 'no'."

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You need to determine if the current message is a follow-up to the previous one."},
                {"role": "user", "content": prompt}
            ]
        )
        
        # Interpret GPT-4's response to extract a yes or no answer
        response_text = response.choices[0].message['content'].strip().lower()
        if "yes" in response_text:
            return True
        else:
            return False
    except Exception as e:
        print(f"Error calling OpenAI API: {e}")
        return False  # Proper error handling should be considered in a real implementation


def get_message_embedding(message):
    """
    Request GPT embedding API to get embeddings for the input message.

    Parameters:
    - message: String, the text message for which to get the embedding.

    Returns:
    - An embedding vector for the message.
    """
    setup_openai_api()  # Ensure API key is set

    try:
        # Request the embedding for the message using the "text-embedding-ada-002" model or a similar one
        response = openai.Embedding.create(
            input=message,
            model="text-embedding-ada-002"  # Choose an appropriate model for your use case
        )
        
        # Extract the embedding vector from the response
        embedding = response['data'][0]['embedding']
        return embedding
    except Exception as e:
        print(f"Error calling OpenAI Embedding API: {e}")
        return None  # Consider how you want to handle errors gracefully



def update_message_info(conn, message_id, **kwargs):
    """
    Update information for a given message in the 'mem' table of the 'memory' database.
    """
    cursor = conn.cursor()
    columns = ', '.join([f"{k} = ?" for k in kwargs.keys()])
    values = list(kwargs.values()) + [message_id]
    query = f"UPDATE mem SET {columns} WHERE id = ?"
    cursor.execute(query, values)
    conn.commit()


def calculate_embedding_distance(history):
    """Calculate the embedding distance between messages."""
    # Use cosine similarity or another distance metric for embeddings.
    pass

def calculate_keyword_relevance(history, keywords):
    """Score messages based on keyword relevance."""
    # This could involve NLP techniques to find keyword occurrences and relevance.
    pass

def calculate_decay_factor(history, time_now):
    """Compute a decay factor for each message based on scores and time."""
    # Decay factor calculation based on time and other scores.
    pass

def summarize_history(history, decay_factors):
    """Summarize the message history into a fixed length."""
    # Summarization logic using decay factors to weigh message importance.
    pass

def calculate_distance(embedding1, embedding2):
    """Calculate the cosine similarity between two embeddings."""
    return cosine_similarity([embedding1], [embedding2])[0][0]

def formulate_gpt_prompt(history):
    """Formulate the GPT prompt based on processed history and scores."""
    # Concatenate and format the history and scores into a GPT prompt.
    pass



Welcome to the chat! Type your message and hit enter. Type 'stop' or press Ctrl-D to end the conversation.
