# Day 5 Solution - Business Solution: Company Brochure Generator

This is my solution to the Day 5 assignment. I've implemented a comprehensive business solution that generates company brochures.

## Features Implemented:
- Intelligent link selection using LLM
- Multi-page content aggregation
- Professional brochure generation
- Model comparison and optimization
- Business-ready output formatting
- Cost-effective processing strategies


In [1]:
# Day 5 Solution - Imports and Setup
import os
import json
import ssl
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from dotenv import load_dotenv
import ollama
import time

# Load environment variables
load_dotenv(override=True)

# SSL fix for Windows
ssl._create_default_https_context = ssl._create_unverified_context
os.environ['PYTHONHTTPSVERIFY'] = '0'
os.environ['CURL_CA_BUNDLE'] = ''

# Initialize clients
openai = OpenAI()

# Constants
MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

print("Day 5 setup complete! Ready for business solution development.")


Day 5 setup complete! Ready for business solution development.


In [None]:
# Enhanced Web Scraping Functions
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/117.0.0.0 Safari/537.36"
    )
}

def fetch_website_contents(url, char_limit=2000):
    """Fetch and clean website content"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        html = response.text
    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return "Error: Could not fetch website content"
    
    soup = BeautifulSoup(html, "html.parser")
    
    # Remove script and style elements
    for script in soup(["script", "style"]):
        script.decompose()
    
    title = soup.title.get_text(strip=True) if soup.title else "No title found"
    text = soup.get_text()
    
    # Clean up whitespace
    lines = (line.strip() for line in text.splitlines())
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    text = ' '.join(chunk for chunk in chunks if chunk)
    
    return (f"{title}\\n\\n{text}").strip()[:char_limit]

def fetch_website_links(url):
    """Fetch all links from a website"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        html = response.text
    except Exception as e:
        print(f"Error fetching links from {url}: {e}")
        return []
    
    soup = BeautifulSoup(html, "html.parser")
    links = []
    
    for a in soup.select("a[href]"):
        href = a.get("href")
        if href:
            # Convert relative URLs to absolute
            if href.startswith(("http://", "https://")):
                links.append(href)
            else:
                links.append(urljoin(url, href))
    
    return list(set(links))  # Remove duplicates

print("Enhanced web scraping functions defined!")


In [None]:
# Intelligent Link Selection
def select_relevant_links(url, model="gpt-4o-mini"):
    """Use LLM to select relevant links for brochure generation"""
    print(f"🔍 Analyzing links for {url}...")
    
    # Get all links
    links = fetch_website_links(url)
    print(f"Found {len(links)} total links")
    
    # Create prompt for link selection
    link_system_prompt = """
    You are provided with a list of links found on a webpage.
    You are able to decide which of the links would be most relevant to include in a brochure about the company,
    such as links to an About page, or a Company page, or Careers/Jobs pages.
    You should respond in JSON as in this example:

    {
        "links": [
            {"type": "about page", "url": "https://full.url/goes/here/about"},
            {"type": "careers page", "url": "https://another.full.url/careers"}
        ]
    }
    """
    
    user_prompt = f"""
    Here is the list of links on the website {url} -
    Please decide which of these are relevant web links for a brochure about the company, 
    respond with the full https URL in JSON format.
    Do not include Terms of Service, Privacy, email links.

    Links (some might be relative links):

    {chr(10).join(links[:50])}  # Limit to first 50 links to avoid token limits
    """
    
    try:
        if model.startswith("gpt"):
            response = openai.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": link_system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                response_format={"type": "json_object"}
            )
            result = response.choices[0].message.content
        else:
            response = ollama.chat(
                model=model,
                messages=[
                    {"role": "system", "content": link_system_prompt},
                    {"role": "user", "content": user_prompt}
                ]
            )
            result = response['message']['content']
        
        links_data = json.loads(result)
        print(f"✅ Selected {len(links_data['links'])} relevant links")
        return links_data
        
    except Exception as e:
        print(f"❌ Error selecting links: {e}")
        return {"links": []}

print("Intelligent link selection function defined!")


In [None]:
# Content Aggregation
def fetch_page_and_all_relevant_links(url, model="gpt-4o-mini"):
    """Fetch main page content and all relevant linked pages"""
    print(f"📄 Fetching content for {url}...")
    
    # Get main page content
    main_content = fetch_website_contents(url)
    
    # Get relevant links
    relevant_links = select_relevant_links(url, model)
    
    # Build comprehensive content
    result = f"## Landing Page:\\n\\n{main_content}\\n## Relevant Links:\\n"
    
    for link in relevant_links['links']:
        print(f"  📄 Fetching {link['type']}: {link['url']}")
        try:
            content = fetch_website_contents(link["url"])
            result += f"\\n\\n### Link: {link['type']}\\n"
            result += content
        except Exception as e:
            print(f"    ❌ Error fetching {link['url']}: {e}")
            result += f"\\n\\n### Link: {link['type']} (Error)\\n"
            result += f"Error fetching content: {e}"
    
    return result

print("Content aggregation function defined!")


In [None]:
# Professional Brochure Generation
def create_company_brochure(company_name, url, model="gpt-4o-mini", style="professional"):
    """Generate a professional company brochure"""
    print(f"🏢 Creating brochure for {company_name}...")
    
    # Get all content
    all_content = fetch_page_and_all_relevant_links(url, model)
    
    # Truncate if too long (to avoid token limits)
    if len(all_content) > 5000:
        all_content = all_content[:5000] + "\\n\\n[Content truncated...]"
    
    # Define brochure system prompt based on style
    if style == "professional":
        brochure_system_prompt = """
        You are an assistant that analyzes the contents of several relevant pages from a company website
        and creates a short brochure about the company for prospective customers, investors and recruits.
        Respond in markdown without code blocks.
        Include details of company culture, customers and careers/jobs if you have the information.
        """
    elif style == "humorous":
        brochure_system_prompt = """
        You are an assistant that analyzes the contents of several relevant pages from a company website
        and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
        Respond in markdown without code blocks.
        Include details of company culture, customers and careers/jobs if you have the information.
        """
    else:
        brochure_system_prompt = """
        You are an assistant that analyzes the contents of several relevant pages from a company website
        and creates a short brochure about the company.
        Respond in markdown without code blocks.
        """
    
    user_prompt = f"""
    You are looking at a company called: {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.

    {all_content}
    """
    
    try:
        if model.startswith("gpt"):
            response = openai.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": brochure_system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.7,
                max_tokens=1000
            )
            brochure = response.choices[0].message.content
        else:
            response = ollama.chat(
                model=model,
                messages=[
                    {"role": "system", "content": brochure_system_prompt},
                    {"role": "user", "content": user_prompt}
                ]
            )
            brochure = response['message']['content']
        
        print(f"✅ Brochure generated successfully!")
        return brochure
        
    except Exception as e:
        print(f"❌ Error generating brochure: {e}")
        return f"Error generating brochure: {e}"

def display_brochure(company_name, url, model="gpt-4o-mini", style="professional"):
    """Display a company brochure"""
    brochure = create_company_brochure(company_name, url, model, style)
    display(Markdown(f"# {company_name} Brochure\\n\\n{brochure}"))

print("Professional brochure generation functions defined!")


In [None]:
# Test Day 5 Solution - Business Brochure Generator
print("## Day 5 Solution Test - Business Brochure Generator")
print("="*60)

# Test with different companies
test_companies = [
    ("Hugging Face", "https://huggingface.co"),
    ("OpenAI", "https://openai.com"),
    ("Anthropic", "https://anthropic.com")
]

print("🏢 Testing brochure generation for different companies...")

for company_name, url in test_companies:
    print(f"\\n{'='*50}")
    print(f"Testing: {company_name}")
    print(f"URL: {url}")
    print('='*50)
    
    try:
        # Test with professional style
        print(f"\\n📄 Generating professional brochure for {company_name}...")
        display_brochure(company_name, url, model=MODEL_GPT, style="professional")
        
    except Exception as e:
        print(f"❌ Error with {company_name}: {e}")
    
    print("\\n" + "-"*40)
