# BROCHURE CREATOR FROM WEB PAGE link evaluation.

In [6]:
import os
import json
from IPython.display import display, Markdown, update_display
from openai import OpenAI
from dotenv import load_dotenv
from scraper import fetch_website_links, fetch_website_contents


load_dotenv(override=True)

API_KEY = os.getenv('GEMINI_API_KEY')
GROQ_API_KEY =os.getenv('GROQ_API_KEY')
# open_ai = OpenAI(base_url="https://generativelanguage.googleapis.com/v1beta",api_key=API_KEY)
groq = OpenAI(base_url="https://api.groq.com/openai/v1",api_key=GROQ_API_KEY)


In [7]:
def get_links(url):
    links = fetch_website_links(url)
    result = "\n".join(links)
    return result


In [8]:
def get_links_user_prompt(url):
    user_prompt = """
    Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""

    user_prompt += get_links(url)
    return user_prompt

In [9]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
    {
        "links": [
            {"type": "about page", "url": "https://full.url/goes/here/about"},
            {"type": "careers page", "url": "https://full.url/goes/here/careers"}
        ]
    }
"""

In [10]:
print(get_links_user_prompt("https://edwarddonner.com"))



    Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/curriculum/
https://edwarddonner.com/proficient/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://edwarddonner.com/curriculum/
https://edwarddonner.com/2026/02/17/ai-coder-vibe-coder-to-agentic-engineer/
https://edwarddonner.com/2026/02/17/ai-coder-vibe-coder-to-agentic-engineer/
https://edwarddonner.com/2026/01/04/

In [11]:

def select_relevant_links(url):
    print(f"Selecting relevant links for {url}")
    response= groq.chat.completions.create(
        model= "openai/gpt-oss-120b",
        messages= [
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )

    result= response.choices[0].message.content
    return json.loads(result)

In [12]:
print(select_relevant_links("https://edwarddonner.com"))

Selecting relevant links for https://edwarddonner.com
{'links': [{'type': 'homepage', 'url': 'https://edwarddonner.com/'}, {'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}]}


In [13]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"Landing page: \n\n {contents}\n ##Relevant links:\n"
    for link in relevant_links['links']:
        result += f"\n\n ### link: {link["type"]}\n"
        result += fetch_website_contents(link["url"])
    return result

In [14]:
print(fetch_page_and_all_relevant_links("https://edwarddonner.com"))

Selecting relevant links for https://edwarddonner.com
Landing page: 

 Home - Edward Donner

Home
AI Curriculum
Proficient AI Engineer
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
I will happily drone on for hours about LLMs to anyone in my vicinity. My friends got fed up with my impromptu lectures, and convinced me to make some Udemy courses. To my total joy (and shock) they’ve b

In [15]:

brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [16]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
    You are looking at a company called: {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.\n\n
    """
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [17]:
def create_brochure(company_name, url):
    stream = groq.chat.completions.create(
        model="openai/gpt-oss-120b",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    display_id = display(Markdown(""), display_id=True)
    response= " "
    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        # update_display(Markdown(response), display_id=display_id)   
        yield response

In [None]:
# create_brochure("Edward Donner", "https://edwarddonner.com")
import gradio as gr

company_name = gr.Textbox(label="Name", info="Enter the name of you company", lines=2)
company_url = gr.Textbox(label="URL", info="Enter the url of the copany website", lines=2)
output_handler = gr.Markdown()
view = gr.Interface(
    fn=create_brochure,
    title=f"GET BROCHURE \N{PARTY POPPER}",
    inputs=[company_name, company_url],
    outputs=[output_handler],
    examples=[["Eddonner"], ["https://edwarddonner.com"]],
    flagging_mode="never"
)

view.launch(inbrowser=True, share=True, auth=("Xage", "peaches"))

* Running on local URL:  http://127.0.0.1:7866
* Running on public URL: https://8ff52a534900d02957.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Selecting relevant links for https://apple.com


