# Brochure builder that accepts the URL of a website and generates a concise summary of the company for perspective clients, investors and recruits

In [1]:
# imports

import os
import json
from httpx import stream
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_contents
from openai import OpenAI


In [2]:
# Initialize env and constants if you have a OPENAI API KEY Else Initialize Ollama

ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

# load_dotenv(override=True)
# api_key = os.getenv('OPENAI_API_KEY')

# if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
#     print("API key looks good so far")
# else:
#     print("There might be a problem with your API key?")

# Model = 'gpt-5-nano'
# openai = OpenAI()

# Use first LLM to determine which links are relevant, applying one shot prompting

In [47]:
# Define prompts for link filtering

link_system_prompt = """
You are provided a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, Company page or Careers/Jobs pages.
You should respond in JSON similar to this example:

{
    "links": [
        {"type": "about Page", "url": "https://full.url/goes/here/about"},
        {"type": "careers Page", "url": "https://another.full.url/careers" }
    ]
}
"""

def get_links_user_prompt(url, links):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these links are relevant web links for a brochure about the company and return 2 relevant links
respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.

Links (some migh be relative links):

"""
    user_prompt += "\n".join(links)
    return user_prompt


In [None]:
# Define first LLM call to return relevant links

def select_relevant_links(url, links):
    print(f"Selecting relevant links for {url}")
    response = ollama.chat.completions.create(model="llama3.2", 
    messages=[
        {"role": "system", "content": link_system_prompt},
        {"role": "user", "content": get_links_user_prompt(url, links)}
        ], 
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    relevantLinks = json.loads(result)
    
    print(f"Found {len(relevantLinks)} relevant links")
    return relevantLinks


# Define function for aggregating website contents and relevant links

In [None]:
def fetch_page_and_all_relevant_links(url):
    content = fetch_website_contents(url)
    relevant_links = select_relevant_links(url, content["links"])
    result = f"## Landing Page:\n\n{(content['title']  + "\n\n" + content['text'])[:2_000]}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link["type"]}\n"
        result += fetch_website_contents(link["url"])
    return result

# Use second LLM to determine create a brochure based on website contents and relevant links

In [28]:
# Define prompts for creating company brochure

brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information
"""

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages:
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""

    user_prompt += fetch_page_and_all_relevant_links(url)
    # Truncate if more than 5000 characters
    user_prompt += user_prompt[:5_000] 
    return user_prompt

In [None]:
# Define second LLM call return brochure

def create_brochure(company_name, url):
    response = ollama.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)


In [53]:
create_brochure("UL Solutions", "https://www.ul.com/" )

Selecting relevant links for https://www.ul.com/
['https://accessibe.com/blog/knowledgebase/screen-reader-guide', '#main-content', 'https://www.ul.com/', '/industries', '#', '/industries/automotive-and-mobility', '/industries/building-technologies-and-construction', '/industries/chemicals-and-materials', '#', '/industries/chemicals-and-materials/basic-and-industrial-chemicals', '/industries/chemicals-and-materials/plastics-and-engineered-materials', '/industries/chemicals-and-materials/specialty-and-fine-chemicals', '/industries/data-centers', '/industries/energy-and-utilities', '#', '/industries/energy-and-utilities/batteries-and-energy-storage', '/industries/energy-and-utilities/energy-equipment', '/industries/energy-and-utilities/oil-and-gas', '/industries/energy-and-utilities/power-distribution', '/industries/energy-and-utilities/renewables', '/industries/financial-and-investment-services', '#', '/industries/financial-and-investment-services/asset-and-property-management', '/indust

KeyError: 'links'