In [9]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [10]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI()

In [11]:
link_analyzer_prompt = """
You are a skilled research analyst. Your task is to identify the most useful introductory links for a given topic from a list of URLs. 
You must ignore forum posts, product pages, and social media links. Focus on high-quality articles, documentation, and educational resources.
Respond ONLY with a JSON object in the following format:
{
    "links": [
        {"type": "overview_article", "url": "https://..."},
        {"type": "technical_docs", "url": "https://..."},
        {"type": "history_summary", "url": "https://..."}
    ]
}
"""

In [12]:
briefing_prompt = """
You are an expert intelligence analyst. You will be given raw text from several articles about a topic. 
Your mission is to synthesize this information into a clear and structured research brief. 
The brief must contain the following sections in Markdown:

Research Brief: {topic}

1. Executive Summary
(A one-paragraph overview of the entire topic.)

2. Key Concepts
(Use bullet points to list and explain the most important terms and ideas.)

3. Important Figures / Events
(List the key people, organizations, or historical events relevant to the topic.)

4. Further Reading
(Provide a list of the original URLs you analyzed for deeper study.)
"""

In [13]:
def get_relevant_links(topic: str, starting_url: str) -> dict:
    
    # getting all links from the starting URL
    links_on_page = fetch_website_links(starting_url)
    
    # user prompt for the Link Analyst
    user_prompt = f"""
    Please analyze the following links related to the topic "{topic}" and return the most relevant ones for a research brief.
    The main URL is {starting_url}. Make sure all returned URLs are absolute.

    Links:
    {"\n".join(links_on_page)}
    """
    
    response = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=[
            {"role": "system", "content": link_analyzer_prompt},
            {"role": "user", "content": user_prompt}
        ],
        response_format={"type": "json_object"}
    )
    
    result_json = response.choices[0].message.content
    relevant_links = json.loads(result_json)
    return relevant_links

In [14]:
def get_all_content(links_data: dict) -> str:
    all_content = ""
    original_urls = []

    for link in links_data.get("links", []):
        url = link.get("url")
        if url:
            original_urls.append(url)
            content = fetch_website_contents(url)
            all_content += f"Content from {url} \n{content}\n\n"
    
    all_content += f"Original URLs for Reference\n" + "\n".join(original_urls)
    return all_content

In [15]:
def create_research_brief(topic: str, starting_url: str):
    relevant_links = get_relevant_links(topic, starting_url)
    full_content = get_all_content(relevant_links)

    user_prompt = f"""
    Please create a research brief on the topic "{topic}" using the following content.
    Remember to include the original URLs in the 'Further Reading' section.

    Content:
    {full_content[:15000]}
    """
    
    stream = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": briefing_prompt.format(topic=topic)},
            {"role": "user", "content": user_prompt}
        ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
create_research_brief(
    topic="The Rise of Artificial Intelligence", 
    starting_url="https://en.wikipedia.org/wiki/Artificial_intelligence"
)