In [45]:
# pip install openai

In [11]:
import os
import requests
from dotenv import load_dotenv

from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [12]:
#Load environment variables from .env file
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [13]:
#Check if the API key is present

if not api_key:
    print("No API key found - check your .env file")
elif not api_key.startswith("sk-proj-"):
    print("verify your api key. It should start with sk-proj-")
elif api_key.strip() != api_key:
    print("You have extra spaces in your API Key, be careful while copying and pasting")

else:
    print("API key found and looks good")


API key found and looks good


In [14]:
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        # Add other headers as needed, e.g., 'Referer', 'Cookie', etc.
    }

In [20]:
# Write a function to fetch and parse a website

def get_website(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    title = soup.title.string if soup.title else "No title found for this website!"

    #remove irrelevant tags - script, styles, images

    for irrelevant in soup.body(["script", "style", "images"]):
        irrelevant.decompose()

    text = soup.body.get_text(separator="\n", strip=True)
    return title, text


In [21]:
get_website("https://www.imdb.com/")

('IMDb: Ratings, Reviews, and Where to Watch the Best Movies & TV Shows',
 'Menu\nMovies\nRelease calendar\nTop 250 movies\nMost popular movies\nBrowse movies by genre\nTop box office\nShowtimes & tickets\nMovie news\nIndia movie spotlight\nTV shows\nWhat\'s on TV & streaming\nTop 250 TV shows\nMost popular TV shows\nBrowse TV shows by genre\nTV news\nWatch\nWhat to watch\nLatest trailers\nIMDb Originals\nIMDb Picks\nIMDb Spotlight\nFamily entertainment guide\nIMDb Podcasts\nAwards & events\nOscars\nEmmys\nSan Diego Comic-Con\nSummer Watch Guide\nToronto Int\'l Film Festival\nSTARmeter Awards\nAwards Central\nFestival Central\nAll events\nCelebs\nBorn today\nMost popular celebs\nCelebrity news\nCommunity\nHelp center\nContributor zone\nPolls\nFor industry professionals\nLanguage\nEnglish (United States)\nLanguage\nFully supported\nEnglish (United States)\nPartially supported\nFrançais (Canada)\nFrançais (France)\nDeutsch (Deutschland)\nहिंदी (भारत)\nItaliano (Italia)\nPortuguês (Brasil

In [None]:
"""
Prompts:

User Prompt: User prompt provides the input data for the AI to process.

    e.g.: The content of this website is as follows, please provide a short summary
    of this website in markdown.




System Prompt: Sets the behaviour of the AI

    e.g.: You are an assistant that analyzes the contents of a website and provides a 
    short summary, ignoring text that might be for navigation. Respond in markdown
  

"""
# system prompt, user prompt, website text, we have to use api call to do the task  

system_prompt = "You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be for navigation. Respond in markdown."

def create_user_prompt(title, text):
    user_prompt = f"You are looking at a website titled {title}"
    user_prompt += "\nThe content of this website is as follows. please provide a short summary of this website in markdown.\n"
    user_prompt += text

    return user_prompt

def create_messages(system_prompt, user_prompt):
    return[
        {"role":"system", "content":system_prompt},
        {"role":"user", "content":user_prompt}

    ]

def summarize(url):


    title, text = get_website(url)
    user_prompt = create_user_prompt(title,text)
    messages = create_messages(system_prompt,user_prompt)
    openai = OpenAI(api_key=api_key)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages=messages
    )
    return response.choices[0].message.content



In [42]:

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [44]:
display_summary("https://www.imdb.com/")

# IMDb Website Summary

IMDb (Internet Movie Database) is a comprehensive online platform dedicated to movies and TV shows. It features:

- **Movie Information**: Release calendars, top and most popular movie lists, genre browsing, box office stats, and showtimes.
- **TV Show Details**: What's currently airing, top and most popular TV shows, genre categories, and TV news.
- **Watch Options**: Recommendations on what to watch, trailers, and exclusive content from IMDb Originals and podcasts.
- **Awards and Events**: Coverage of major awards like the Oscars and Emmys, including festival insights.
- **Celebrity News**: Updates on popular celebrities and industry professionals.

Overall, IMDb serves as a go-to resource for entertainment enthusiasts looking for ratings, reviews, trailers, and where to watch their favorite films and shows.

In [None]:
"""
H/W: create an email summarizer, weather forecast, etc

"""