# Company Brochure Generator



Creating a company brochure generator using multiple llm models to increase efficiency and reduce redundancy while maintaining quality

In [2]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
import anthropic
import ollama


In [3]:
import gradio as gr

In [4]:
# Initialize and constants

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-


In [18]:
# Initialize an OpenAI object and define the Model you want to use
openai = OpenAI()
MODEL = "gpt-4o-mini"

In [31]:
#Class to represent a webpage. All the relevant information is scraped from the URL provided

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

#### Lets check the links that were fetched from the Playstation website

In [10]:
website = Website("https://store.playstation.com/en-ca/")
website.links

['https://www.playstation.com/en-ca',
 'https://www.playstation.com/en-ca/ps5/games/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-games%3Aps5',
 'https://www.playstation.com/en-ca/ps4/ps4-games/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-games%3Aps4',
 'https://www.playstation.com/en-ca/ps-vr2/games/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-games%3Aps-vr2',
 'https://www.playstation.com/en-ca/games/pc-games/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-games%3Apc',
 'https://www.playstation.com/en-ca/ps-plus/games/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-games%3Aps-plus',
 'https://www.playstation.com/en-ca/ps5/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-ps5%3Aconsole',
 'https://www.playstation.com/en-ca/ps5/games/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-ps5%3Agames',
 'https://www.playstation.com/en-ca/accessories/#controllers/?smcid=other%3Aen-ca%3Ablank%3Aprimary%20nav%3Amsg-ps5%3Acontrollers',
 'https://www.playstation.com/en-ca

#### As we can see there are a significant number of links fetched most of which are unnecesssary to build a brochure

## Using LLM to trim down the links to just the prudent ones

In [22]:
# Define the system prompt for the LLM with an example

link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [13]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [14]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

### Here you have a list of just the necessary links that the model felt is necessary to build a brochure

In [19]:
get_links("https://store.playstation.com/en-ca/")

{'links': [{'type': 'about page',
   'url': 'https://www.playstation.com/en-ca/corporate/about-us/'},
  {'type': 'company page',
   'url': 'https://www.playstation.com/en-ca/corporate/playstation-studios/'},
  {'type': 'support page',
   'url': 'https://www.playstation.com/en-ca/support/'}]}

In [23]:
# Define a function that builds a user prompt based on the links found on the landing page

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [24]:
# Build a function that iterates through these links and scrapes date

def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [27]:
# Define the main function that takes in the URL, Tone and Model and generates the Brochure

def create_brochure(company_name, url, tone, model):
    result = ""
    
    system_brochure_prompt = f"You are an assistant that analyzes the contents of several relevant pages from a company website \
    and creates a short brochure with a {tone} tone about the company for prospective customers, investors and recruits. Respond in markdown.\
    Include details of company culture, customers and careers/jobs if you have the information."

    MESSAGES = [
            {"role": "system", "content": system_brochure_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ]
    if model == "GPT":
        MODEL = "gpt-4o-mini"
        response = openai.chat.completions.create(
        model=MODEL,
        messages=MESSAGES
    )
        result = response.choices[0].message.content
        
    elif model == "Llama":
        MODEL = "llama3.2"
        response = ollama.chat(
            model=MODEL,
            messages=MESSAGES
        )
        result = response["message"]["content"]
        
    
    display(Markdown(result))

In [30]:
# Create an interface for Gradio and define the controls such as the text fields

view = gr.Interface(
    fn=create_brochure,
    inputs=[
        gr.Textbox(label="Enter Website Name"),
        gr.Textbox(label="Enter Website URL (including http:/https)"),
        gr.Textbox(label="Enter Tone (eg. Humorous, Serious)"),
        gr.Dropdown(["GPT", "Llama"], label="Select AI Model")
    ],
    outputs=[
        gr.Markdown(label="Brochure:")
        
    ],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




Found links: {'links': [{'type': 'company page', 'url': 'https://www.playstation.com/en-ca/corporate/about-us/'}, {'type': 'careers page', 'url': 'https://www.playstation.com/en-ca/corporate/playstation-studios/'}, {'type': 'support page', 'url': 'https://www.playstation.com/en-ca/support/'}]}


**PlayStation: Empowering the Power of Play**

[Image: PlayStation logo]

At PlayStation, we're passionate about delivering innovative and thrilling experiences to gamers around the world. Our mission is to push the boundaries of entertainment and innovation, just as we did when we first launched the original PlayStation in Japan in 1994.

**Our Values**

We believe that play is borderless, and that's why we're committed to creating a gaming experience without barriers that fits the needs of players of all abilities. Our values are:

* **Accessibility**: We strive to create inclusive communities for our players and staff.
* **Environmental Responsibility**: We're working to reduce our environmental impact and encourage our players to do the same.
* **Online Safety**: We prioritize making gaming welcoming, secure, and inclusive for everyone.
* **Diversity and Inclusion**: We value diversity and strive to create a work environment where all employees can be themselves.

**Our Products and Services**

We offer a range of flagship products and services that enhance your gaming experience:

* **PlayStation 5**: Our latest console, featuring an ultra-fast SSD, Tempest 3D Audio technology, and a generation of games that harness its lightning speed and graphical capabilities.
* **DualSense Wireless Controller**: Enhance your play with innovative haptic feedback and trigger effects in compatible games.
* **PlayStation VR2**: The virtual reality companion to PS5, bringing ultra-immersive 4K HDR gameplay and amazing new sensations from PlayStation VR2 Sense Technology.

**Our Game Development Studios**

We're proud of our family of game development studios creating incredible gaming worlds for PlayStation consoles and PC:

* **Horizon**: A far-future version of Earth where tribal cultures exist in the shadow of colossal, ferocious machines.
* **God of War**: An epic saga of vengeance and protection, with a demigod protagonist and his son.
* **The Last of Us**: Emotional storytelling, relatable characters, and intense action gameplay in a world ravaged by a deadly infection.

**Join Our Community**

If you're passionate about gaming and innovation, join our team! We offer exciting career opportunities in game development, marketing, and more. Visit [sonyinteractive.com](http://sonyinteractive.com) to learn more about our company culture and job openings.

**Get Ready to Play**

Experience the power of play with PlayStation. Explore our latest products and services, and discover new gaming worlds that will take your breath away.