In [1]:
# Importing gradio for the UI
import gradio as gr


In [17]:
# Import the env_vars module
import sys
import os

from modules.env_vars import set_os_env_vars, check_missing_vars
from modules.neon_db import run_neon_query, load_sql_query
from modules.date_functions import get_current_date

import importlib

import modules.reference_extraction
importlib.reload(modules.reference_extraction)
from modules.reference_extraction import create_content_from_df
from modules.prompt_templates import one_shot_example, system_message_example

set_os_env_vars() # This will execute the code in env_vars.py and put the environment variables in os

In [3]:
from modules.langchain_config import set_langsmith_client, get_langsmith_tracer, get_llm_model, load_model_costs

set_langsmith_client()
tracer = get_langsmith_tracer()

In [4]:
# Load the costs
MODEL_COSTS = load_model_costs()
MODEL_COSTS


{'claude-3-sonnet-20240229': {'provider': 'anthropic',
  'input': 0.003,
  'output': 0.015},
 'claude-3-5-sonnet-20241022': {'provider': 'anthropic',
  'input': 0.003,
  'output': 0.015},
 'gpt-4o-mini': {'provider': 'openai'}}

In [5]:
# Initialize the language model
model_name = "claude-3-5-sonnet-20241022"
# model_name = "gpt-4o-mini"
streaming = True # Streaming is when the LLM returns a token at a time, instead of the entire response at once

# Initialize the language model
llm = get_llm_model(model_name, streaming, MODEL_COSTS)


In [6]:
query = load_sql_query("web_pages.sql")
df = run_neon_query(query)

print("Number of rows:", len(df.index))
df.head(1)

Number of rows: 60


Unnamed: 0,id,url,media_type,status,created_at,title,description,summary,author,published_at
0,b0762d1d-a825-4427-a785-cb52229f4c67,https://aidanmclaughlin.notion.site/reasoners-...,web-page,completed,2024-11-29 07:51:53.011015,Notion – The all-in-one workspace for your not...,A new tool that blends your everyday work apps...,The article discusses the limitations of curre...,,NaT


In [18]:
# Print out the results (summary, titles, etc.)
all_content, all_content_list, all_content_dict = create_content_from_df(df)

print(len(all_content_list))
print(all_content_list[0])
print(all_content_dict[1].keys())
print(all_content_dict[1])

60

<START Article Number: 1>
Title: Notion – The all-in-one workspace for your notes, tasks, wikis, and databases.
URL: https://aidanmclaughlin.notion.site/reasoners-problem
Summary: The article discusses the limitations of current reasoning models, particularly OpenAI's o1, which utilize reinforcement learning (RL) to enhance reasoning capabilities. While these models show promise in structured environments with clear rewards, they struggle with open-ended tasks that lack frequent feedback, such as creative writing or philosophical reasoning. The author argues that despite the advancements in RL, these models do not generalize well beyond their training domains, leading to subpar performance in tasks requiring nuanced understanding. The piece highlights the challenges of scaling model size and the potential stagnation in AI development if the focus remains solely on improving reasoning without addressing the need for larger, more capable models. Key insights include the importance of

In [23]:
# Prepare footnotes for the prompt
footnotes_text = "\n".join([f"[{i}] {source['title']} | {source['url']}" for i, source in all_content_dict.items()])
footnotes_text


"[1] Notion – The all-in-one workspace for your notes, tasks, wikis, and databases. | https://aidanmclaughlin.notion.site/reasoners-problem\n[2] LangChain State of AI Agents Report | https://www.langchain.com/stateofaiagents\n[3] How to Get Started in AI Consulting: - Jason Liu | https://jxnl.co/writing/2024/10/31/consulting-start/\n[4] GitHub - abus-aikorea/voice-pro: Comprehensive Gradio WebUI for audio processing, powered by Whisper engines (Whisper, Faster-Whisper, Whisper-Timestamped). Features Voice Changer, zero-shot Voice Cloning (E2, F5-TTS), YouTube downloading, vocal isolation(UVR5), Text-to-Speech (Edge-TTS), and multi-language translation. Perfect for content creators and developers. | https://github.com/abus-aikorea/voice-pro\n[5] Creating a LLM-as-a-Judge That Drives Business Results – | https://hamel.dev/blog/posts/llm-judge/\n[6] GitHub - elicit/machine-learning-list: A curriculum for learning about foundation models, from scratch to the frontier | https://github.com/e

In [None]:
newsletter_prompt = one_shot_example(get_current_date())

# Create the chain with tracing
chain = (newsletter_prompt | llm).with_config(
    {
        "callbacks": [tracer],
        "tags": ["newsletter_generation"],
    }
)

# Test the chain
newsletter = chain.invoke({"context": all_content, "current_date": get_current_date()})
print(newsletter.content)

In [None]:
newsletter_prompt = system_message_example()

# Create the chain with tracing
chain = (newsletter_prompt | llm).with_config(
    {
        "callbacks": [tracer],
        "tags": ["newsletter_generation"],
    }
)

# Test the chain
newsletter = chain.invoke({"context": all_content, "current_date": get_current_date()})
print(newsletter.content)