In [None]:
"""
os module is for environment variables
requests is for http requests
dotenv loads environment variables from a .env file
beautifulsoup is an html parser that extracts content from web pages
markdown is for formatted output in jupyter
openai client to interact with the api
"""
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI


"""
load_dotenv(override=True) loads variables from .env file, overrides existing environment variables if present
os.getenv('OPENAI_API_KEY') retrieves the OpenAI API key from environment variables
OpenAI(api_key=api_key) initializes OpenAI client with the retrieved API key
headers mimics a browser request by a user, some servers reject or restrict requests from scripts or bots
"""
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
openai = OpenAI(api_key=api_key)
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"}
url = "https://www.reddit.com/r/leagueoflegends/"


"""
class Website takes a url as an argument
self.url stores the given url
self.title stores the website's title
self.text stores the website's text
response = requests.get(url, headers=headers) sends http get request to the url with headers
soup = BeautifulSoup(response.content, 'html.parser') parses webpage content using beautifulsoup
for irrelevant in soup.body(["script", "style", "img", "input"]): irrelevant.decompose() removes unwanted html elements
"""
class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "Failed to get website"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

        
"""
openai doc page about messages and response: https://platform.openai.com/docs/api-reference/chat/create
response = openai.chat.completions.create(model = "gpt-4o-mini", messages = messages) sends the messages to openai api and stores the response in response
display(Markdown(summary)) displays summary in markdown
"""
if __name__ == "__main__":
    website = Website(url)
    system_prompt = (
        "You are an expert assistant tasked with creating concise, insightful summaries of website content. "
        "Your summaries should capture the essential points clearly, maintain a neutral tone, and highlight key ideas or important announcements. "
        "Format your summary neatly using markdown."
    )
    user_prompt = (
        f"Below is the content of a website titled '{website.title}':\n\n"
        f"{website.text}\n\n"
        "Please provide an insightful summary of this content. "
        "Format your summary clearly using markdown."
    )
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    response = openai.chat.completions.create(model = "gpt-4o-mini", messages = messages)
    summary = response.choices[0].message.content
    display(Markdown(summary))