In [85]:
from pgvector.psycopg import register_vector
import psycopg
import os

conn = psycopg.connect(**psycopg.conninfo.conninfo_to_dict(os.environ["DATABASE_URL"]))
conn.autocommit = True

conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
register_vector(conn)

In [86]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from datetime import datetime

min_datetime = conn.execute(
    "SELECT MIN(CAST(date || ' ' || time AS TIMESTAMP)) FROM documents WHERE NOT is_taxonomy"
).fetchone()[0]
max_datetime = conn.execute(
    "SELECT MAX(CAST(date || ' ' || time AS TIMESTAMP)) FROM documents WHERE NOT is_taxonomy"
).fetchone()[0]

def get_node_color(date, time_end):
    if time_end == 'None':
        time_end = "00:00:00"   
    # convert the dates to datetime objects
    date = datetime.strptime(f"{date} {time_end}", "%Y-%m-%d %H:%M:%S").timestamp()


    cmap = plt.get_cmap('coolwarm')  # Choose a colormap
    norm = mcolors.Normalize(vmin=min_datetime.timestamp(), vmax=max_datetime.timestamp())  # Normalize the dates

    # Convert the date to a float value between 0 and 1
    date_value = norm(date)

    # Get the corresponding color from the colormap
    color = cmap(date_value)

    # Convert the color to a hex value
    return mcolors.to_hex(color)


In [87]:
import networkx as nx
import matplotlib.pyplot as plt

# Create a new directed graph
dag = nx.DiGraph()

# Add nodes to the graph
for row in conn.execute(
    "SELECT id, description, date, time, is_taxonomy, raw FROM documents"
):
    if row[4]:
        pass
        # dag.add_node(row[0], label=str(row[5]), title=row[1], color="#00FF00")
    else:
        dag.add_node(
            row[0],
            label=str(row[5]),
            title=row[1],
            date=str(row[2])
        )

In [88]:
# Add edges to the graph
for row in conn.execute(
    """SELECT parent_id, child_id, weight FROM edges WHERE child_id not in (
        select id from documents where is_taxonomy = TRUE
    )"""
):  # WHERE parent_id IS NOT NULL
    parent_id, child_id, weight = row
    dag.add_edge(parent_id, child_id, weight=weight)

In [89]:
# get the top 5 connected components
components = nx.weakly_connected_components(dag)
subgraphs = [(dag.subgraph(c), len(c)) for c in components]
subgraphs_sorted_by_size = sorted(subgraphs, key=lambda x: x[1], reverse=True)
largest_5_subgraphs = subgraphs_sorted_by_size[:10]
largest_5_graphs = [g[0] for g in largest_5_subgraphs]

In [90]:
import os
from openai import OpenAI

summaries = []

client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])

prompt = """
I will provide you a list of Google searches I have performed through time to pursue a certain goal / skill / knowledge.
What can you tell of my journey?
"""

def get_completion(records):
    res = client.chat.completions.create(
      model="gpt-4",
      messages=[
        {"role": "user", "content": f"{prompt}\n{records}"},
      ]
    )

    return res.choices[0].message.content
  


In [91]:
# use openai gpt to generate a summary for each subgraph
from tqdm.notebook import tqdm

completions = []
for graph in tqdm(largest_5_graphs):
    # longest_path = nx.dag_longest_path(graph)
    # graph_nodes = [graph.nodes(data=True)[node] for node in longest_path]
    # graph_nodes = sorted(graph_nodes, key=lambda x: x['date'])
    
    records = "\n".join([f"{node['date']} {node['label']}" for node in graph_nodes])
    
    res = get_completion(records)
    completions.append([records, res])

  0%|          | 0/10 [00:00<?, ?it/s]