In [None]:
import requests
import json
from typing import List
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display, clear_output
from openai import OpenAI
from dotenv import load_dotenv
import os
from scraper import fetch_website_links, fetch_website_contents

In [None]:
load_dotenv(override=True)
api_key = os.getenv('OPENROUTER_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

openrouter_url = "https://openrouter.ai/api/v1"
openai = OpenAI(api_key=api_key, base_url=openrouter_url)
MODEL = "gpt-5-nano"

In [None]:
company_name = input("Enter the company name: ")
url = input("Enter the company url: ")

In [None]:
link_system_prompt = "You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
    EXAMPLE 1:
    {
        "links": [
            {"type": "about page", "url": "https://full.url/goes/here/about"},
            {"type": "careers page", "url": "https://another.full.url/careers"}
        ]
    }
    EXAMPLE 2:
    {
        "links": [
            {"type": "company blog", "url": "https://blog.example.com"},
            {"type": "our story", "url": "https://example.com/our-story"}
        ]
    }
    """

In [None]:
def get_links_user_prompt(url):
    user_prompt = f"Here is the list of links on the website of {url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    links = fetch_website_links(url)
    user_prompt += "\n".join(links[:20])
    return user_prompt

In [None]:
def get_links(url):
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format = {"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [None]:
def get_all_details(url):
    result = "Landing  page:\n"
    result += fetch_website_contents(url)
    links = get_links(url)

    for link in links["links"]:
        result += f"{link['type']}\n"
        try:
            result += f"\n\n### Link: Link: {link['type']}\n"
            result += fetch_website_contents(link["url"])
        except Exception as e:
            print(f"Omitted link: {link['url']}: {e}")
            continue
    return result

In [None]:
system_prompt = (
    "You are an assistant that analyzes the contents of several relevant pages from a company website "
    "and creates a great type of brochure about the company for prospective customers, investors, and recruits. "
    "Respond in markdown. Include details of company culture, customers, and careers/jobs if you have the information. Add emoticons where ever possible.\n\n"

    "Please structure the brochure using the following sections:\n"
    "1. **Introduction**: A brief overview of the company.\n"
    "2. **Company Culture**: Emphasize fun, atmosphere, and any unique cultural elements.\n"
    "3. **Customers**: Mention notable customers or industries.\n"
    "4. **Careers/Jobs**: Highlight career opportunities.\n"
    "5. **Conclusion**: Wrap up with a final lighthearted message.\n"
)

In [None]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20000]
    return user_prompt

In [None]:
def stream_brochure():
    global brochure_text
    brochure_text = ""

    stream = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream = True
    )

    response = ""
    display_handle = display(Markdown(""), display_id = True)
    for chunk in stream:
        content = chunk.choices[0].delta.content or ''
        response += content
        brochure_text += content
        response = response.replace("```", ""). replace("markdown", "")
        update_display(Markdown(response), display_id = display_handle.display_id)

In [None]:
def user_translate_brochure(language):
    clear_output(wait = True)

    translation_stream = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "user", "content": f"Translate the following to {language}:\n {brochure_text}"}
        ],
        stream = True
    )

    display_handle = display(Markdown(""), display_id = True)
    translated_text = ""

    for chunk in translation_stream:
        content = chunk.choices[0].delta.content or ""
        if content:
            translated_text += content
            update_display(Markdown(translated_text), display_id = display_handle.display_id)

In [None]:
stream_brochure()
language_choice = input("Enter the language to translate the brochure into (e.g., 'French'): ")
user_translate_brochure(language_choice)