# Week 1 Day 5 - Business Brochure Generator (Community Contribution)

This notebook implements a business solution that generates company brochures by:
- Intelligently selecting relevant links using LLM
- Aggregating content from multiple pages
- Generating professional brochures with different styles
- Supporting both OpenAI and Ollama models


In [1]:
# Setup and imports
import os
import json
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Markdown, display
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv(override=True)

# Initialize OpenAI client
openai = OpenAI()

# Headers for web scraping
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
    )
}

print("Setup complete!")


Setup complete!


In [2]:
# Web scraping utilities
def fetch_website_contents(url, char_limit=2000):
    """Fetch and clean website content"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        html = response.text
    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return f"Error: Could not fetch website content"

    soup = BeautifulSoup(html, "html.parser")
    
    # Remove script and style elements
    for script in soup(["script", "style"]):
        script.decompose()
    
    title = soup.title.get_text(strip=True) if soup.title else "No title found"
    text = soup.get_text()
    
    # Clean up whitespace
    lines = (line.strip() for line in text.splitlines())
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    text = ' '.join(chunk for chunk in chunks if chunk)
    
    return (f"{title}\\n\\n{text}").strip()[:char_limit]

def fetch_website_links(url):
    """Fetch all links from a website"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        html = response.text
    except Exception as e:
        print(f"Error fetching links from {url}: {e}")
        return []
    
    soup = BeautifulSoup(html, "html.parser")
    links = []
    
    for a in soup.select("a[href]"):
        href = a.get("href")
        if href:
            # Convert relative URLs to absolute
            if href.startswith(("http://", "https://")):
                links.append(href)
            else:
                links.append(urljoin(url, href))
    
    return list(set(links))  # Remove duplicates

print("Web scraping utilities ready!")


Web scraping utilities ready!


In [3]:
# Intelligent link selection using LLM
def select_relevant_links(url, model="gpt-4o-mini"):
    """Use LLM to select relevant links for brochure generation"""
    print(f"🔍 Analyzing links for {url}...")
    
    # Get all links
    links = fetch_website_links(url)
    print(f"Found {len(links)} total links")
    
    # Create prompt for link selection
    link_system_prompt = """
    You are provided with a list of links found on a webpage.
    You are able to decide which of the links would be most relevant to include in a brochure about the company,
    such as links to an About page, or a Company page, or Careers/Jobs pages.
    You should respond in JSON as in this example:

    {
        "links": [
            {"type": "about page", "url": "https://full.url/goes/here/about"},
            {"type": "careers page", "url": "https://another.full.url/careers"}
        ]
    }
    """
    
    user_prompt = f"""
    Here is the list of links on the website {url} -
    Please decide which of these are relevant web links for a brochure about the company, 
    respond with the full https URL in JSON format.
    Do not include Terms of Service, Privacy, email links.

    Links (some might be relative links):

    {chr(10).join(links[:50])}  # Limit to first 50 links to avoid token limits
    """
    
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": link_system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            response_format={"type": "json_object"}
        )
        result = response.choices[0].message.content
        links_data = json.loads(result)
        print(f"✅ Selected {len(links_data['links'])} relevant links")
        return links_data
        
    except Exception as e:
        print(f"❌ Error selecting links: {e}")
        return {"links": []}

print("Intelligent link selection ready!")


Intelligent link selection ready!


In [4]:
# Content aggregation
def fetch_page_and_all_relevant_links(url, model="gpt-4o-mini"):
    """Fetch main page content and all relevant linked pages"""
    print(f"📄 Fetching content for {url}...")
    
    # Get main page content
    main_content = fetch_website_contents(url)
    
    # Get relevant links
    relevant_links = select_relevant_links(url, model)
    
    # Build comprehensive content
    result = f"## Landing Page:\\n\\n{main_content}\\n## Relevant Links:\\n"
    
    for link in relevant_links['links']:
        print(f"  📄 Fetching {link['type']}: {link['url']}")
        try:
            content = fetch_website_contents(link["url"])
            result += f"\\n\\n### Link: {link['type']}\\n"
            result += content
        except Exception as e:
            print(f"    ❌ Error fetching {link['url']}: {e}")
            result += f"\\n\\n### Link: {link['type']} (Error)\\n"
            result += f"Error fetching content: {e}"
    
    return result

print("Content aggregation ready!")


Content aggregation ready!


In [5]:
# Professional brochure generation
def create_company_brochure(company_name, url, model="gpt-4o-mini", style="professional"):
    """Generate a professional company brochure"""
    print(f"🏢 Creating brochure for {company_name}...")
    
    # Get all content
    all_content = fetch_page_and_all_relevant_links(url, model)
    
    # Truncate if too long (to avoid token limits)
    if len(all_content) > 5000:
        all_content = all_content[:5000] + "\\n\\n[Content truncated...]"
    
    # Define brochure system prompt based on style
    if style == "professional":
        brochure_system_prompt = """
        You are an assistant that analyzes the contents of several relevant pages from a company website
        and creates a short brochure about the company for prospective customers, investors and recruits.
        Respond in markdown without code blocks.
        Include details of company culture, customers and careers/jobs if you have the information.
        """
    elif style == "humorous":
        brochure_system_prompt = """
        You are an assistant that analyzes the contents of several relevant pages from a company website
        and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
        Respond in markdown without code blocks.
        Include details of company culture, customers and careers/jobs if you have the information.
        """
    else:
        brochure_system_prompt = """
        You are an assistant that analyzes the contents of several relevant pages from a company website
        and creates a short brochure about the company.
        Respond in markdown without code blocks.
        """
    
    user_prompt = f"""
    You are looking at a company called: {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.

    {all_content}
    """
    
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": brochure_system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.7,
            max_tokens=1000
        )
        brochure = response.choices[0].message.content
        print(f"✅ Brochure generated successfully!")
        return brochure
        
    except Exception as e:
        print(f"❌ Error generating brochure: {e}")
        return f"Error generating brochure: {e}"

def display_brochure(company_name, url, model="gpt-4o-mini", style="professional"):
    """Display a company brochure"""
    brochure = create_company_brochure(company_name, url, model, style)
    display(Markdown(f"# {company_name} Brochure\\n\\n{brochure}"))

print("Professional brochure generation ready!")


Professional brochure generation ready!


In [6]:
# Test the brochure generator
COMPANY_NAME = "Radio Africa Group"
COMPANY_URL = "https://radioafricagroup.co.ke/"

print(f"Testing brochure generation for {COMPANY_NAME}...")
display_brochure(COMPANY_NAME, COMPANY_URL, style="professional")


Testing brochure generation for Radio Africa Group...
🏢 Creating brochure for Radio Africa Group...
📄 Fetching content for https://radioafricagroup.co.ke/...
🔍 Analyzing links for https://radioafricagroup.co.ke/...
Found 34 total links
✅ Selected 5 relevant links
  📄 Fetching about page: https://staging.radioafrica.digital/about-us/
  📄 Fetching case studies page: https://radioafricagroup.co.ke/case-studies
  📄 Fetching contact page: https://radioafricagroup.co.ke/contact
  📄 Fetching careers page: https://staging.radioafrica.digital/careers
Error fetching https://staging.radioafrica.digital/careers: 404 Client Error: Not Found for url: https://staging.radioafrica.digital/careers
  📄 Fetching services page: https://radioafricagroup.co.ke/services.html
Error fetching https://radioafricagroup.co.ke/services.html: 404 Client Error: Not Found for url: https://radioafricagroup.co.ke/services.html
✅ Brochure generated successfully!


# Radio Africa Group Brochure\n\n# Radio Africa Group Brochure

## About Us
Radio Africa Group (RAG) is a leading media company based in Kenya, renowned for its diverse range of platforms that include six national radio stations, one television station, and a national newspaper. Our flagship brands such as Kiss FM, Classic 105, Radio Jambo, The Star newspaper, and Kiss TV reach millions of Kenyans daily, making us a cornerstone of the country's media landscape.

At RAG, we pride ourselves on being at the forefront of Africa's marketing and communication industry. We are dedicated to innovation, creativity, and collaboration, striving to shape better futures through impactful storytelling and entertainment.

## Our Mission
We aim to amplify Kenyan voices, champion local talent, and deliver meaningful journalism that connects citizens to national conversations. With a focus on digital transformation and strategic partnerships, we are committed to leading the evolution of modern African media.

## Company Culture
At Radio Africa Group, our culture is built on creativity, collaboration, and a shared passion for media. We celebrate our employees' milestones, as seen in our recent surprise birthday celebration for CEO Martin Khafafa, fostering a family-like environment that values each individual's contribution. We believe in pushing boundaries and nurturing talent, creating a dynamic workplace where innovation thrives.

## Our Audience
Our diverse clientele includes listeners and viewers across Kenya, with a special focus on engaging younger audiences through music, talk shows, podcasts, and live streaming. We aim to build meaningful connections between brands and their target audiences, utilizing our deep insights and cutting-edge technology.

## Careers at RAG
We are always on the lookout for talented individuals who share our passion for media and innovation. Joining Radio Africa Group means becoming part of a vibrant team that values creativity, growth, and professional development. If you're interested in shaping the future of media in Africa, explore career opportunities with us!

## Contact Us
For inquiries, advertising opportunities, or to learn more about our services, reach out to us at:

**Radio Africa Group**  
Lion Place, Westlands  
Nairobi, Kenya  
Phone: +254 711 046200  
Email: [info@radioafricagroup.co.ke](mailto:info@radioafricagroup.co.ke)  
Operating Hours: Mon-Fri: 10:00 AM - 09:00 PM  

Join us at Radio Africa Group, where we are transforming the media landscape and connecting communities across Kenya!