In [82]:
import re
import html
import unicodedata

import sys
sys.path.insert(0, "../")

from hindsight_feed_db import fetch_content_generators, fetch_contents
import utils
from feed_generator import FeedGenerator
from feeders.exa_topic.exa_topic import ExaTopicFeeder

In [2]:
content_generators = fetch_content_generators()

In [3]:
exa_content_generators = [cg for cg in content_generators if cg.gen_type == "ExaTopicFeeder"]
exa_content_generators_ids = {cg.id for cg in exa_content_generators}

In [4]:
content = fetch_contents(non_viewed=False)

In [89]:
last_added_timestamp = max([c.timestamp for c in content])
newly_interacted_content = [c for c in content if c.last_modified_timestamp > last_added_timestamp]


In [9]:
exa_content = [c for c in content if c.content_generator_id in exa_content_generators_ids]
clicked_content = [c for c in exa_content if c.clicked]

In [41]:
# def get_dive_deeper_prompt(content):
#     prompt = """You are an assistant whose task is to create a search query for a embedding search database to help a user find content that
#                 they are interested in. Below is text from content that the user has recently been interested in:
#                 """

#     for c in content:
#         prompt += f"New content:\nTitle:{c.title}"
#         content_text = c.content_generator_specific_data['text']
#         prompt += content_text + "\n"

#     prompt += """Create a short, descriptive sentence to feed the embedding search database. Include as much information from different sources as possible. Answer"""
#     return prompt

def get_dive_deeper_prompt(content):
    prompt = """You are an assistant who generates search queries for an embedding search database to help a user find content of interest.

    Below are excerpts from content that the user has recently engaged with:

    """
    for c in content:
        prompt += f"Title: {c.title}\n"
        content_text = c.content_generator_specific_data['text']
        prompt += f"{content_text}\n\n"

    prompt += """Task:
        - Analyze the content above.
        - Identify the main themes and topics.
        - Generate a single, concise sentence that summarizes the user's interests.
        - Include as many relevant keywords and concepts from the content as possible.
        - The sentence should be suitable as a search query for finding similar content.

        Answer:"""
    return prompt

In [42]:
clicked_cg_ids = {c.content_generator_id for c in clicked_content}

In [43]:
cg_id_to_prompt = {}
for cg_id in clicked_cg_ids:
    cg_content = [c for c in clicked_content if c.content_generator_id == cg_id]
    cg_id_to_prompt[cg_id] = get_dive_deeper_prompt(cg_content)

# Running with local LLM

In [44]:
from mlx_lm import load, generate

def llm_generate(pipeline, prompt, max_tokens):
    model, tokenizer = pipeline
    return generate(model, tokenizer, prompt=prompt, max_tokens=max_tokens)

In [67]:
pipeline = load("mlx-community/Llama-3.1-SuperNova-Lite-bf16")
# pipeline = load("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit")

Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 13183.71it/s]


In [72]:
cg_id_to_next_topic = {}
for cg_id, prompt in cg_id_to_prompt.items():
    cg_id_to_next_topic[cg_id] = llm_generate(pipeline=pipeline, prompt=prompt, max_tokens=100)

In [79]:
for cg_id, next_topic in cg_id_to_next_topic.items():
    print(cg_id, next_topic.split("\n")[1])

1 "Explainable AI, human-like intelligence, private AI, conversational AI, decision intelligence, AI governance, AI ethics, AI development, AI integration, AI workflow, AI models, AI training, AI customization, AI fine-tuning, AI as a service, AI for business, AI for enterprise, AI for finance, AI for healthcare, AI for security, AI for public sector, AI for ESG risk, AI for citizen services, AI for government, AI for professional services,
2 The user is interested in exploring the intersection of artificial intelligence, human consciousness, and self-awareness, with a focus on the potential implications of AI on human identity and the nature of reality.
3         "I'm interested in AI, machine learning, algorithmic trading, deep learning, artificial intelligence, econophysics, quantitative hedge funds, algorithmic bias, algorithmic governance, and the intersection of technology and society."
4         "Personal AI, ChatGPT, Quarkus Embeddings, Redis Search, OpenShift, Strapi CMS, WebH

In [75]:
def refine_search_query_prompt(initial_response):
    prompt = """You are an assistant tasked with refining a search query generated by a previous analysis of user interests. Below is the initial query generated:

    '{initial_response}'

    Task:
    - Review the initial query.
    - Clarify and condense the query to better capture the essential themes and keywords.
    - Ensure the refined query is sharp, specific, and optimized for searching related content.
    - The refined query should be a single, succinct sentence that effectively encapsulates the user's core interests.

    Refined Query:"""
    return prompt.format(initial_response=initial_response)

In [76]:
cg_id_to_refined_next_topic = {}
for cg_id, next_topic in cg_id_to_next_topic.items():
    refine_prompt = refine_search_query_prompt(next_topic)
    cg_id_to_refined_next_topic[cg_id] = llm_generate(pipeline=pipeline, prompt=refine_prompt, max_tokens=100)

In [78]:
for cg_id, next_topic in cg_id_to_refined_next_topic.items():
    print(cg_id, next_topic.split("\n")[1])

1     "Explainable AI, conversational AI, and decision intelligence for business, finance, healthcare, security, and public sector applications."
2     "The intersection of artificial intelligence and human consciousness, exploring AI's implications on human identity and reality."
3     "AI, machine learning, and algorithmic trading intersecting with econophysics, quantitative hedge funds, and societal implications of technology."
4     'AI, machine learning, natural language processing, low-code development, vector databases, self-hosted backends, AI-assisted creativity, AI-powered productivity, AI-generated art, AI-driven innovation, AI research, AI development, AI applications, AI tools, AI services, AI platforms, AI ecosystems, AI communities, ChatGPT, Quarkus, Redis Search, OpenShift, Strapi CMS, WebHooks, vector similarity searches, custom prompts, AI-generated portraits'


# Feed back into FeedGenerator

In [81]:
feed_generator = FeedGenerator(content_generators=None)

In [83]:
for cg_id, next_topic in cg_id_to_refined_next_topic.items():
    next_topic_sentence = next_topic.split("\n")[1]
    print(cg_id, next_topic_sentence)
    feed_generator.add_content_generator(ExaTopicFeeder(name=f"exa_topic_child_of_{cg_id}", 
                                                        description="ExaTopicFeeder generated by summarizing the parent content that was clicked on",
                                                        topic=next_topic_sentence,
                                                        parent_generator_id=cg_id))

1     "Explainable AI, conversational AI, and decision intelligence for business, finance, healthcare, security, and public sector applications."
exa_topic_child_of_1 fetching content
Failed request for https://www.nocode.ai/building-explainable-ai/
2     "The intersection of artificial intelligence and human consciousness, exploring AI's implications on human identity and reality."
exa_topic_child_of_2 fetching content
3     "AI, machine learning, and algorithmic trading intersecting with econophysics, quantitative hedge funds, and societal implications of technology."
exa_topic_child_of_3 fetching content
Failed request for https://wiki.santafe.edu/index.php/Machine_Learning,_Complexity_and_Market_Behavior
4     'AI, machine learning, natural language processing, low-code development, vector databases, self-hosted backends, AI-assisted creativity, AI-powered productivity, AI-generated art, AI-driven innovation, AI research, AI development, AI applications, AI tools, AI services, AI p

In [85]:
o1_resp = "Personal AI assistants that are private and on-device, trained on personal data to become human-like extensions of oneself, enhancing memory and human connections, integrating AI into daily life with a focus on data privacy, explainable AI, and responsible AI."
feed_generator.add_content_generator(ExaTopicFeeder(name=f"exa_topic_overall_summary_from_o1", 
                                                        description="ExaTopicFeeder generated by summarizing everything using ChatGPT o1",
                                                        topic=o1_resp, parent_generator_id=None))

exa_topic_overall_summary_from_o1 fetching content
Content with URL 'https://haltia.ai/' already exists in the database and will not be added.
Content with URL 'https://www.personal.ai/yours' already exists in the database and will not be added.
Content with URL 'https://pi.ai/home' already exists in the database and will not be added.
Content with URL 'https://snoop.personal.ai/' already exists in the database and will not be added.
Content with URL 'https://www.personal.ai/your-true-personal-ai' already exists in the database and will not be added.
