In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [3]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
Apple = Website("https://www.apple.com")
Apple.links
print(Apple.links)
print(Apple.get_contents())

['/', '/us/shop/goto/store', '/mac/', '/ipad/', '/iphone/', '/watch/', '/apple-vision-pro/', '/airpods/', '/tv-home/', '/entertainment/', '/us/shop/goto/buy_accessories', 'https://support.apple.com/?cid=gn-ols-home-hp-tab', '/us/search', '/us/shop/goto/bag', '/iphone-16-pro/', '/iphone-16-pro/', '/us/shop/goto/buy_iphone/iphone_16_pro', '/iphone-16/', '/iphone-16/', '/us/shop/goto/buy_iphone/iphone_16', '/apple-watch-series-10/', '/apple-watch-series-10/', '/us/shop/goto/buy_watch/apple_watch_series_10', '/apple-watch-ultra-2/', '/apple-watch-ultra-2/', '/us/shop/goto/buy_watch/apple_watch_ultra_2', '/watch/', '/watch/', '/us/shop/goto/buy_watch', '/macbook-air/', '/macbook-air/', '/us/shop/goto/buy_mac/macbook_air', '/ipad-air/', '/ipad-air/', '/us/shop/goto/buy_ipad/ipad_air', '/airpods-pro/hearing-health/', '/airpods-pro/hearing-health/', '/us/shop/goto/buy_airpods/airpods_pro_2', '/us/shop/goto/buy_iphone/carrier_offers', '/us/shop/goto/buy_iphone/carrier_offers', '/us/shop/goto/tr

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, a Company page, Careers/Jobs pages, or Product/Services.\n"

link_system_prompt += "Here are a few examples of how you should respond:\n"

link_system_prompt += """
Example 1:
{
    "links": [
        {"type": "about page", "url": "https://example.com/about"},
        {"type": "careers page", "url": "https://example.com/careers"}
    ]
}

Example 2:
{
    "links": [
        {"type": "company page", "url": "https://sample.org/company"},
        {"type": "product page", "url": "https://sample.org/products"},
        {"type": "jobs page", "url": "https://sample.org/jobs"}
    ]
}
"""


In [6]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, a Company page, Careers/Jobs pages, or Product/Services.
Here are a few examples of how you should respond:

Example 1:
{
    "links": [
        {"type": "about page", "url": "https://example.com/about"},
        {"type": "careers page", "url": "https://example.com/careers"}
    ]
}

Example 2:
{
    "links": [
        {"type": "company page", "url": "https://sample.org/company"},
        {"type": "product page", "url": "https://sample.org/products"},
        {"type": "jobs page", "url": "https://sample.org/jobs"}
    ]
}



In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
print(get_links_user_prompt(Apple))

Here is the list of links on the website of https://www.apple.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
/
/us/shop/goto/store
/mac/
/ipad/
/iphone/
/watch/
/apple-vision-pro/
/airpods/
/tv-home/
/entertainment/
/us/shop/goto/buy_accessories
https://support.apple.com/?cid=gn-ols-home-hp-tab
/us/search
/us/shop/goto/bag
/iphone-16-pro/
/iphone-16-pro/
/us/shop/goto/buy_iphone/iphone_16_pro
/iphone-16/
/iphone-16/
/us/shop/goto/buy_iphone/iphone_16
/apple-watch-series-10/
/apple-watch-series-10/
/us/shop/goto/buy_watch/apple_watch_series_10
/apple-watch-ultra-2/
/apple-watch-ultra-2/
/us/shop/goto/buy_watch/apple_watch_ultra_2
/watch/
/watch/
/us/shop/goto/buy_watch
/macbook-air/
/macbook-air/
/us/shop/goto/buy_mac/macbook_air
/ipad-air/
/ipad-air/
/us/shop/goto/buy_ipad/ipad_air
/airpods-pro/hearing-

In [9]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"},
        
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [10]:
Apple = Website("https://www.apple.com")
Apple.links

['/',
 '/us/shop/goto/store',
 '/mac/',
 '/ipad/',
 '/iphone/',
 '/watch/',
 '/apple-vision-pro/',
 '/airpods/',
 '/tv-home/',
 '/entertainment/',
 '/us/shop/goto/buy_accessories',
 'https://support.apple.com/?cid=gn-ols-home-hp-tab',
 '/us/search',
 '/us/shop/goto/bag',
 '/iphone-16-pro/',
 '/iphone-16-pro/',
 '/us/shop/goto/buy_iphone/iphone_16_pro',
 '/iphone-16/',
 '/iphone-16/',
 '/us/shop/goto/buy_iphone/iphone_16',
 '/apple-watch-series-10/',
 '/apple-watch-series-10/',
 '/us/shop/goto/buy_watch/apple_watch_series_10',
 '/apple-watch-ultra-2/',
 '/apple-watch-ultra-2/',
 '/us/shop/goto/buy_watch/apple_watch_ultra_2',
 '/watch/',
 '/watch/',
 '/us/shop/goto/buy_watch',
 '/macbook-air/',
 '/macbook-air/',
 '/us/shop/goto/buy_mac/macbook_air',
 '/ipad-air/',
 '/ipad-air/',
 '/us/shop/goto/buy_ipad/ipad_air',
 '/airpods-pro/hearing-health/',
 '/airpods-pro/hearing-health/',
 '/us/shop/goto/buy_airpods/airpods_pro_2',
 '/us/shop/goto/buy_iphone/carrier_offers',
 '/us/shop/goto/buy_ip

In [11]:
get_links("https://www.apple.com")

{'links': [{'type': 'about page', 'url': 'https://www.apple.com/about/'},
  {'type': 'company page', 'url': 'https://www.apple.com/newsroom/'},
  {'type': 'careers page', 'url': 'https://www.apple.com/careers/us/'},
  {'type': 'product page', 'url': 'https://www.apple.com/products/'},
  {'type': 'environment page', 'url': 'https://www.apple.com/environment/'},
  {'type': 'diversity page', 'url': 'https://www.apple.com/diversity/'},
  {'type': 'leadership page', 'url': 'https://www.apple.com/leadership/'},
  {'type': 'investor relations page', 'url': 'https://investor.apple.com/'}]}

In [12]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [13]:
print(get_all_details("https://www.apple.com"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.apple.com/about/'}, {'type': 'company page', 'url': 'https://www.apple.com/newsroom/'}, {'type': 'careers page', 'url': 'https://www.apple.com/careers/us/'}, {'type': 'product page', 'url': 'https://www.apple.com/mac/'}, {'type': 'product page', 'url': 'https://www.apple.com/ipad/'}, {'type': 'product page', 'url': 'https://www.apple.com/iphone/'}, {'type': 'product page', 'url': 'https://www.apple.com/watch/'}, {'type': 'product page', 'url': 'https://www.apple.com/apple-vision-pro/'}, {'type': 'product page', 'url': 'https://www.apple.com/airpods/'}, {'type': 'product page', 'url': 'https://www.apple.com/tv-home/'}]}
Landing page:
Webpage Title:
Apple
Webpage Contents:
Apple
Apple
Store
Mac
iPad
iPhone
Watch
Vision
AirPods
TV & Home
Entertainment
Accessories
Support
0
+
iPhone 16 Pro
Hello, Apple Intelligence.
Learn more
Buy
iPhone 16
Hello, Apple Intelligence.
Learn more
Buy
Apple Watch Series 10
Thinstant classic.
L

In [14]:
system_prompt = """
You are an assistant that generates professional and visually appealing marketing brochures for companies.

Based on the provided webpage content, write a clear and well-formatted brochure using markdown.

Follow this format:

# Welcome to <Company Name>

## <Slogan or Tagline if available>

### Who We Are
Brief description of the company's mission, vision, or purpose.

### Our Products
List major products or services. Use bold text for product names followed by short descriptions.

### Our Customers
Mention the types of customers or markets the company serves.

### Our Culture
Describe company values, innovation, inclusion, or work environment.

### Careers at <Company Name>
Highlight any hiring information, career opportunities, or links to job postings.

Only include sections that are relevant based on the content. Keep the tone professional and inspiring.
"""


In [15]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [16]:
get_brochure_user_prompt("Apple", "https://www.apple.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.apple.com/about/'}, {'type': 'company page', 'url': 'https://www.apple.com/newsroom/'}, {'type': 'careers page', 'url': 'https://www.apple.com/careers/us/'}, {'type': 'product page', 'url': 'https://www.apple.com/mac/'}, {'type': 'product page', 'url': 'https://www.apple.com/ipad/'}, {'type': 'product page', 'url': 'https://www.apple.com/iphone/'}, {'type': 'product page', 'url': 'https://www.apple.com/watch/'}, {'type': 'product page', 'url': 'https://www.apple.com/airpods/'}, {'type': 'product page', 'url': 'https://www.apple.com/apple-vision-pro/'}, {'type': 'product page', 'url': 'https://www.apple.com/apple-card/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-music/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-tv-plus/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-arcade/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-fitness-plus/'}, {'type': '

'You are looking at a company called: Apple\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nApple\nWebpage Contents:\nApple\nApple\nStore\nMac\niPad\niPhone\nWatch\nVision\nAirPods\nTV & Home\nEntertainment\nAccessories\nSupport\n0\n+\niPhone 16 Pro\nHello, Apple\xa0Intelligence.\nLearn more\nBuy\niPhone 16\nHello, Apple\xa0Intelligence.\nLearn more\nBuy\nApple Watch Series 10\nThinstant classic.\nLearn more\nBuy\nApple Watch Ultra 2\nNew finish. Never quit.\nLearn more\nBuy\nApple Watch\nLive healthier. Train better. Stay connected.\nLearn more\nBuy\nMacBook Air\nSky blue color.\nSky high performance with M4.\nLearn more\nBuy\nBuilt for Apple Intelligence.\niPad Air\nNow supercharged by the M3 chip.\nLearn more\nBuy\nBuilt for Apple Intelligence.\nAirPods\xa0Pro\xa02\nNow with a Hearing\xa0Aid feature.\n1\nLearn more\nBuy\nCarrier deals at\xa0Apple\nGet up to

In [17]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))
    return result

In [21]:
create_brochure("Apple", "https://www.apple.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.apple.com/newsroom/'}, {'type': 'careers page', 'url': 'https://www.apple.com/careers/us/'}, {'type': 'company page', 'url': 'https://www.apple.com/investor/'}, {'type': 'product page', 'url': 'https://www.apple.com/mac/'}, {'type': 'product page', 'url': 'https://www.apple.com/ipad/'}, {'type': 'product page', 'url': 'https://www.apple.com/iphone/'}, {'type': 'product page', 'url': 'https://www.apple.com/watch/'}, {'type': 'product page', 'url': 'https://www.apple.com/apple-vision-pro/'}, {'type': 'product page', 'url': 'https://www.apple.com/airpods/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-music/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-tv-plus/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-arcade/'}, {'type': 'service page', 'url': 'https://www.apple.com/apple-fitness-plus/'}]}


# Welcome to Apple

## Think Different.

### Who We Are
At Apple, we strive to create the most innovative products and services that enrich people's lives. Our mission is to empower individuals and inspire creativity, while continually pushing the boundaries of technology.

### Our Products
- **iPhone 16 Pro**: Experience the power of Apple Intelligence with enhanced performance and unmatched efficiency.
- **iPad Air**: Now supercharged by the M3 chip for outstanding capability and versatility.
- **MacBook Air**: Sky high performance wrapped in a beautiful design, available in stunning colors.
- **Apple Watch Series 10**: A timeless classic with features that help you live healthier, train better, and stay connected.
- **AirPods Pro 2**: Now equipped with a Hearing Aid feature, redefining audio experience.
- **Apple TV+**: Enjoy a plethora of entertaining content ranging from films to exclusive interviews.

### Our Customers
We proudly serve a diverse range of customers – from everyday individuals to creative professionals and businesses, ensuring that everyone can access and benefit from our innovative technology.

### Our Culture
At Apple, we foster a culture of innovation and inclusion. Our commitment to diversity and collaboration helps us create a work environment where everyone is valued and encouraged to think differently.

### Careers at Apple
We are always looking for talented individuals to join our team. Explore exciting career opportunities across various fields by visiting our [Careers page](https://www.apple.com/careers). Your journey at Apple might just begin here!

"# Welcome to Apple\n\n## Think Different.\n\n### Who We Are\nAt Apple, we strive to create the most innovative products and services that enrich people's lives. Our mission is to empower individuals and inspire creativity, while continually pushing the boundaries of technology.\n\n### Our Products\n- **iPhone 16 Pro**: Experience the power of Apple Intelligence with enhanced performance and unmatched efficiency.\n- **iPad Air**: Now supercharged by the M3 chip for outstanding capability and versatility.\n- **MacBook Air**: Sky high performance wrapped in a beautiful design, available in stunning colors.\n- **Apple Watch Series 10**: A timeless classic with features that help you live healthier, train better, and stay connected.\n- **AirPods Pro 2**: Now equipped with a Hearing Aid feature, redefining audio experience.\n- **Apple TV+**: Enjoy a plethora of entertaining content ranging from films to exclusive interviews.\n\n### Our Customers\nWe proudly serve a diverse range of customer

In [22]:
brochure_text = create_brochure("Apple", "https://www.apple.com")
print(type(brochure_text))
print(brochure_text[:500])  # Just a preview

Found links: {'links': [{'type': 'about page', 'url': 'https://www.apple.com/about/'}, {'type': 'company page', 'url': 'https://www.apple.com/newsroom/'}, {'type': 'careers page', 'url': 'https://www.apple.com/careers/'}, {'type': 'product page', 'url': 'https://www.apple.com/mac/'}, {'type': 'product page', 'url': 'https://www.apple.com/iphone/'}, {'type': 'product page', 'url': 'https://www.apple.com/ipad/'}, {'type': 'product page', 'url': 'https://www.apple.com/watch/'}, {'type': 'product page', 'url': 'https://www.apple.com/apple-vision-pro/'}, {'type': 'product page', 'url': 'https://www.apple.com/airpods/'}, {'type': 'product page', 'url': 'https://www.apple.com/tv-home/'}, {'type': 'company page', 'url': 'https://www.apple.com/environment/'}, {'type': 'company page', 'url': 'https://www.apple.com/diversity/'}, {'type': 'company page', 'url': 'https://www.apple.com/supply-chain/'}, {'type': 'company page', 'url': 'https://www.apple.com/education/'}, {'type': 'investor page', 'ur

# Welcome to Apple

## Hello, Apple Intelligence.

### Who We Are
At Apple, we strive to create the best products that enhance the lives of our customers. With a commitment to innovation and excellence, we set the standard for technology and design, merging cutting-edge performance with aesthetic beauty.

### Our Products
- **iPhone 16 Pro**: Experience advanced technology combined with sleek design.
- **Apple Watch Series 10**: A timeless classic for health, fitness, and connectivity.
- **MacBook Air**: Showcasing sky-high performance with the M4 chip in a stunning sky blue color.
- **iPad Air**: Now supercharged by the M3 chip for seamless productivity.
- **AirPods Pro 2**: Featuring a new Hearing Aid capability for enhanced accessibility.
- **Apple TV+**: A hub for premium entertainment, providing a range of shows and films.

### Our Customers
Our customers range from technology enthusiasts and professionals to everyday users who seek reliable, intuitive devices that seamlessly integrate into their lives.

### Our Culture
At Apple, we value innovation, creativity, and inclusivity. Our work environment fosters collaboration, where every employee is encouraged to contribute ideas that push boundaries and redefine industries.

### Careers at Apple
Join a dynamic team at Apple where you can make an impact. Explore our career opportunities and find your place in shaping the future of technology. Visit [Apple Careers](https://www.apple.com/careers/) for more information.

<class 'str'>
# Welcome to Apple

## Hello, Apple Intelligence.

### Who We Are
At Apple, we strive to create the best products that enhance the lives of our customers. With a commitment to innovation and excellence, we set the standard for technology and design, merging cutting-edge performance with aesthetic beauty.

### Our Products
- **iPhone 16 Pro**: Experience advanced technology combined with sleek design.
- **Apple Watch Series 10**: A timeless classic for health, fitness, and connectivity.
- **MacBo


In [23]:
def translate_to_spanish(brochure_text):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {
                "role": "system",
                "content": "Translate the following brochure into Spanish. Preserve markdown formatting."
            },
            {
                "role": "user",
                "content": brochure_text
            }
        ]
    )
    return response.choices[0].message.content


In [24]:
# Translate and show the Spanish version
spanish_brochure = translate_to_spanish(brochure_text)
display(Markdown("## Versión en Español"))
display(Markdown(spanish_brochure))


## Versión en Español

# Bienvenido a Apple

## Hola, Inteligencia Apple.

### Quiénes Somos
En Apple, nos esforzamos por crear los mejores productos que mejoran la vida de nuestros clientes. Con un compromiso con la innovación y la excelencia, establecemos el estándar para la tecnología y el diseño, fusionando un rendimiento de vanguardia con una belleza estética.

### Nuestros Productos
- **iPhone 16 Pro**: Experimenta tecnología avanzada combinada con un diseño elegante.
- **Apple Watch Series 10**: Un clásico atemporal para la salud, el fitness y la conectividad.
- **MacBook Air**: Presentando un rendimiento extraordinario con el chip M4 en un impresionante color azul cielo.
- **iPad Air**: Ahora súper cargado por el chip M3 para una productividad sin interrupciones.
- **AirPods Pro 2**: Con una nueva funcionalidad de audífono para una mejor accesibilidad.
- **Apple TV+**: Un hub para entretenimiento premium, proporcionando una variedad de programas y películas.

### Nuestros Clientes
Nuestros clientes van desde entusiastas de la tecnología y profesionales hasta usuarios cotidianos que buscan dispositivos fiables e intuitivos que se integren sin problemas en sus vidas.

### Nuestra Cultura
En Apple, valoramos la innovación, la creatividad y la inclusividad. Nuestro ambiente de trabajo fomenta la colaboración, donde se anima a cada empleado a contribuir con ideas que desafían límites y redefinen industrias.

### Carreras en Apple
Únete a un equipo dinámico en Apple donde puedes tener un impacto. Explora nuestras oportunidades laborales y encuentra tu lugar en la construcción del futuro de la tecnología. Visita [Carreras en Apple](https://www.apple.com/careers/) para más información.