## STEP 0: Install required libraries

In [48]:
# --- Install dependencies ---
!pip install -q openai python-dotenv beautifulsoup4 gradio

## STEP 1: Import packages and load OpenAI API key securely from Colab secrets

In [49]:
# --- Imports ---
import os
import json
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import openai
from IPython.display import Markdown, display, update_display
import gradio as gr

In [50]:
# --- Secure API Key via Colab Secrets ---
from google.colab import userdata
from openai import OpenAI

client = OpenAI(api_key=userdata.get("OPENAI_API_KEY"))

In [51]:
# --- Fix the model ---
MODEL = 'gpt-4o-mini'

## STEP 2: Define website scraper to extract content and links

In [52]:
# --- Web scraper class ---
headers = {"User-Agent": "Mozilla/5.0"}

class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")
        self.title = soup.title.string.strip() if soup.title else "No title found"
        if soup.body:
            for tag in soup.body(["script", "style", "img", "input"]):
                tag.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get("href") for link in soup.find_all("a")]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

## STEP 3: Use GPT-4o-mini to decide which links are relevant

In [53]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [54]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [55]:
def get_filtered_links(url):
    website = Website(url)
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

## STEP 4: Collect all content from landing page + relevant links

In [56]:
def get_all_details(url):
    result = "Landing page:\n"
    landing_page = Website(url)
    result += landing_page.get_contents()

    links = get_filtered_links(url)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

## STEP 5: Brochure generation prompt

In [57]:
system_prompt_tone = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, url, tone):
    prompt = f"You are looking at a company called: {company_name}.\n"
    prompt += f"Here are the contents of its landing page and other relevant pages:\n"
    prompt += f"Use this to build a short brochure with a {tone} tone.\n"
    prompt += get_all_details(url)
    return prompt[:5000]

## STEP 6: Generate brochure in English using GPT-4o-mini

In [58]:
def create_brochure(company_name, url, tone="professional"):
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_tone},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url, tone)}
        ],
        temperature=0.7
    )
    return response.choices[0].message.content

## STEP 7: Translate brochure (if not in English)

In [59]:
def translate_brochure(text, target_language):
    if target_language.lower() in ["american english", "english", "en"]:
        return text
    translation_prompt = f"Please translate the following brochure to {target_language}. Only return the translated markdown:\n\n{text}"
    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": translation_prompt}],
        temperature=0.5
    )
    return response.choices[0].message.content

## STEP 8: Combine logic for Gradio app

In [60]:
def brochure_app(company_name, company_url, tone, language):
    try:
        english = create_brochure(company_name, company_url, tone)
        if language.lower() in ["american english", "english", "en"]:
            return english
        return translate_brochure(english, language)
    except Exception as e:
        return f"❌ Error: {str(e)}"

## STEP 9: Launch Gradio UI

In [61]:
with gr.Blocks() as demo:
    gr.Markdown("## 🌍 Multilingual Company Brochure Generator")

    with gr.Row():
        name = gr.Textbox(label="Company Name", placeholder="e.g., CNN")
        url = gr.Textbox(label="Company Website URL", placeholder="e.g., https://edition.cnn.com")

    with gr.Row():
        tone = gr.Dropdown(
                  ["professional", "jovial", "friendly", "inspirational", "luxury", "youthful", "serious"],
                  label="Brochure Tone",
                  value="professional"
                )

        language = gr.Dropdown(
                  [
                      "American English", "British English", "French", "Spanish", "German", "Hindi",
                      "Mandarin Chinese", "Japanese", "Korean", "Arabic", "Portuguese", "Russian",
                      "Indonesian", "Malay", "Vietnamese", "Italian", "Dutch", "Thai", "Swedish"
                  ],
                  label="Brochure Language",
                  value="American English"
                )

    output = gr.Markdown(label="Generated Brochure")

    btn = gr.Button("Generate Brochure")
    btn.click(brochure_app, inputs=[name, url, tone, language], outputs=output)

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1bda93c2fcc9aac468.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


