In [None]:
system_prompt = """
Imagine you are an AI agent with a task to analyze and categorize a user's search history, using timestamps to understand their daily objectives. Your role is to identify patterns in the search queries, grouping them by common goals, while considering the time of the day to establish context and relevance. You're given a stream of records, each a snapshot of the user's search activities at specific times.

For each new record, your process is two-fold:

1. Evaluate existing groups of searches, examining their contents to determine if the new record aligns with any of these established themes. If so, you might need to modify the group's description to better encompass this new information.
2. If none of the existing groups seem fitting, your task is to initiate a new category, starting with this fresh record.
"""

prompt_1 = lambda time, search, groups = None: f"""
Your current incoming record reads: "{time}, {search}"

{"Here are the current groups: " + groups if len(groups) > 0 else "There are no existing groups at this moment."}

"""

prompt_2 = """
Format your answer in JSON with this schema, explaining how you have created a new group or updated an existing one.
Use the id field to refer to an existing group, if you're creating a new group leave the id field null.

{
id: number,
title: string,
description: string
}
"""

In [None]:
from collections import defaultdict

groups_db = defaultdict(dict)

In [None]:
%load_ext dotenv
%dotenv

In [None]:
from openai import OpenAI
import json

client = OpenAI()

In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_group_members",
            "description": "Display the members of an existing group to help decide if a new record belongs to it.",
            "parameters": {
                "type": "object",
                "properties": {
                    "group_id": {
                        "type": "string",
                        "description": "The ID of the group to display.",
                    },
                },
                "required": ["group_id"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "create_new_group",
            "description": "Create a new group with the given title and description for the current record.",
            "parameters": {
                "type": "object",
                "properties": {
                    "title": {
                        "type": "string",
                        "description": "The title of the new group.",
                    },                    
                    "description": {
                        "type": "string",
                        "description": "The description of the new group.",
                    },
                },
                "required": ["title", "description"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "update_group",
            "description": "Update an existing group with the given title and description, in a way that fit the current record.",
            "parameters": {
                "type": "object",
                "properties": {
                    "group_id": {
                        "type": "string",
                        "description": "The ID of the group to update.",
                    },
                    "title": {
                        "type": "string",
                        "description": "The title of the new group.",
                    },                    
                    "description": {
                        "type": "string",
                        "description": "The description of the new group.",
                    },
                },
                "required": ["group_id", "title", "description"],
            },
        },
    }
]

def get_group_members(group_id):
    group = groups_db[int(group_id)]
    if group == {}:
        return "This group does not exist."
    else:
        return group["members"]

def create_new_group(title, description):
    group_id = len(groups_db) + 1
    groups_db[group_id] = {
        "id": group_id,
        "title": title,
        "description": description,
        "members": []
    }
    return group_id

def update_group(group_id, title, description):
    groups_db[group_id]["title"] = title
    groups_db[group_id]["description"] = description
    return group_id


available_functions = {
    "get_group_members": get_group_members,
    "create_new_group": create_new_group,
    "update_group": update_group,
}

In [None]:
def process_calls(response, messages, title):
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls

    if not tool_calls:
        print("No tool calls!!", title)
        return

    if len(tool_calls) > 1:
        raise Exception("Can only use one tool at a time.")
    else:
        tool_call = tool_calls[0]
        function_name = tool_call.function.name
        function_to_call = available_functions[function_name]
        function_args = json.loads(tool_call.function.arguments)
        res = function_to_call(**function_args)

        print(function_name, function_args, title)

        if tool_call.function.name == "get_group_members":
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": f"Here are the members of the group: {res}. \n\n If you don't want to use this group, you can create a new one or use an existing one. Try to minimize group creation if possible.",
                }
            )

            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=messages,
                tools=tools,
                tool_choice="auto",  
            )

            process_calls(response, messages, title)
        else:
            group_id = res
            groups_db[group_id]["members"].append(title)


In [None]:
import pandas as pd

df = pd.read_csv("raw_data/2023-04-15.csv")

for i, row in df.head(20).iterrows():
    messages = [{"role": "system", "content": system_prompt}]
    messages.append({"role": "user", "content": prompt_1(row["hour"], row["title"], str(groups_db.values()))})

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        tools=tools,
        tool_choice="auto",  
    )

    process_calls(response, messages, row["title"])