In [None]:
import os
import json
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

MODEL = 'gpt-4o-mini'
company_name, url = "League of Legends", "https://www.leagueoflegends.com/en-us/"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                  "AppleWebKit/537.36 (KHTML, like Gecko)"
                  "Chrome/117.0.0.0 Safari/537.36"
}
links_system_prompt = (
    "You are provided with a list of links found on a webpage. "
    "You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an about page or careers page. "
    "You should respond in JSON as in this example: "
    """
    {
        "links": [
            {"type": "about page", "url": "https://full.url/goes/here/about"},
            {"type": "careers page", "url": "https://another.full.url/careers"}
        ]
    }
    """
)
brochure_system_prompt = (
    "You analyze company webpages and create a humorous, engaging brochure in markdown. "
    "Include details about company culture, customers, and career opportunities."
)

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
if not (api_key and api_key.startswith('sk-proj-')):
    raise ValueError("API key missing or invalid, check .env file")
    
openai = OpenAI(api_key=openai_api_key)


# timeout = 10 adds a timeout to prevent indefinite hangs if the webpage is unresponsive
class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for tag in ["script", "style", "img", "input"]:
            [e.decompose() for e in soup(tag)]
        self.text = soup.body.get_text(separator="\n", strip=True) if soup.body else ""
        self.links = [link.get('href') for link in soup.find_all('a') if link.get('href')]
        
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
        
def get_links(url):
    try:
        website = Website(url)
        links_user_prompt = (
            f"Here is the list of links on the website of {website.url} - "
            "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. "
            "Do not include Terms of Service, Privacy, email links. "
            "Links (some might be relative links): \n"
            + "\n".join(website.links)
        )
        response = openai.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": links_system_prompt},
                {"role": "user", "content": links_user_prompt}
            ],
            response_format={"type": "json_object"}
        )
        result = response.choices[0].message.content
        return json.loads(result)
    except Exception as e:
        print(f"Error fetching links: {e}")
        return {"links": []}
        
def get_all_details(url):
    result = f"Landing page:\n{Website(url).get_contents()}"
    links = get_links(url)
    for link in links["links"]:
        link_url = link.get('url')
        if link_url:
            result += f"\n\n{link['type'].title()}:\n{Website(link_url).get_contents()}"
    return result
    
def brochure_user_prompt(company_name, url):
    prompt = (
        f"You're looking at company: {company_name}\n"
        "Use these webpage contents to build a humorous, engaging markdown brochure:\n\n"
        + get_all_details(url)
    )
    return prompt[:5000]
    
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        content = chunk.choices[0].delta.content or ''
        response += content
        cleaned_response = response.replace("```markdown", "").replace("```", "").strip()
        update_display(Markdown(cleaned_response), display_id=display_handle.display_id)

        
if __name__ == "__main__":
    stream_brochure(company_name, url)