In [1]:
# importing all the required libraries 
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [None]:
#load the libraries in dotenv file and validate if we have the valid openAI Api key
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

In [3]:
#create a instance of OpenAI to be further used in scrapping the website
openai = OpenAI()

In [12]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)
    
    def scrap_data(self, text, title, user_prompt, system_prompt):
        message = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        response = openai.chat.completions.create(model="gpt-4o-mini", messages = message)
        return display(Markdown(response.choices[0].message.content))

In [None]:
website_instance = Website("https://www.geeksforgeeks.org/how-to-upload-project-on-github-from-jupyter-notebook/")
web_text = website_instance.text
web_title = website_instance.title
print(web_text)
print(web_title)

In [None]:
user_prompt = f"list out the process of uploading a jupyter project on github in bullet points from the given website text : {website_instance.text}"
system_prompt = "you are a professional with good sense of humour"
scraped_data = website_instance.scrap_data(web_text, web_title, user_prompt, system_prompt)
print(scraped_data)