In [3]:
# Imports

import os
import requests

from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [10]:
# Load environment
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Check the API Key

if not api_key:
  print("No API key was found")
elif api_key[:8] != "sk-proj-":
  print("An API key was found but it doesn't start with 'sk-proj-'. Please check if you are using a correct API key.")
elif api_key.strip() != api_key:
  print("An API key was found, but it looks like it might have space or tab characters at the start or end. Please remove them.")
else:
  print("API key found and looks good so far.")


API key found and looks good so far.


In [13]:
# A class for a Webpage

class Website:
  """ 
  A utility class to use to scrap a Website.
  """

  url: str
  title: str
  text: str

  def __init__(self, url):
    """Create a Website object from the given url using the BeautifulSoup library."""

    self.url = url
    response = requests.get(url)
    soup = BeautifulSoup(markup = response.content,
                         features="html.parser")
    self.title = soup.title.string if soup.title else "No title found"

    for irrelevant in soup.body(["script", "style", "img", "input"]):
      irrelevant.decompose()

    self.text = soup.body.get_text(separator="\n", strip=True)

In [15]:
# Let's try one out
website = Website("https://www.udemy.com/")
website.title, website.text

('Online Courses - Learn Anything, On Your Schedule | Udemy',
 'Search bar\nSearch for anything\nSite navigation\nMost popular\nMore from Udemy\nUdemy Business\nGet the app\nInvite friends\nHelp and Support\nEnglish\nSkip to content\nCategories\nSearch for anything\nTrending Now\nShow all trending skills\nTop companies choose\nUdemy Business\nto build in-demand career skills.\nEnglish\nUdemy Business\nTeach on Udemy\nGet the app\nAbout us\nContact us\nCareers\nBlog\nHelp and Support\nAffiliate\nInvestors\nTerms\nPrivacy policy\nSitemap\nAccessibility statement\n© 2024 Udemy, Inc.')

## Types of prompts

Models like GPT4o have trained to receive insturctions in particular way.

They expect to receive:

**System prompts** -> tells them what task they are performing and what tone they should use.

**User prompts** -> the conversation starter that they should reply to.


In [17]:
# Define a system prompt.
system_prompt = "You are an assistant that anlyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown."

In [21]:
# A function that writes a User Prompt that asks for summaries of websites

user_prompt = f"You are looking a website titled {website.title}. The contents of this website is as follows; please provide a short summart if this website in markdown. It it includes news or announcements, then summarize these too. {website.text}"

## Messages

The API from OpenAIU expects ti receive messages in particular structure. Many of the other APIs share this structure.

```
[
  {"role": "system", "content", "system message goes here"},
  {"role": "user", "content", "user message goes here"},
]
```


In [22]:
message = [
  {"role": "system", "content": system_prompt},
  {"role": "system", "content": user_prompt},
]

In [28]:
# Call OpenAI API
def summarize(url):

  website = Website(url)

  # Define a system prompt.
  system_prompt = "You are an assistant that anlyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown."

  # A function that writes a User Prompt that asks for summaries of websites
  user_prompt = f"You are looking a website titled {website.title}. The contents of this website is as follows; please provide a short summart if this website in markdown. It it includes news or announcements, then summarize these too. {website.text}"

  message = [
    {"role": "system", "content": system_prompt},
    {"role": "system", "content": user_prompt},
  ]

  response = openai.chat.completions.create(
    model = "gpt-4o-mini",
    messages = message
  )

  # return response
  return response.choices[0].message.content

In [30]:
web_summary = summarize("https://edwarddonner.com")

In [31]:
display(Markdown(web_summary))

# Summary of Edward Donner's Website

Edward Donner's website serves as a personal platform for sharing insights and resources related to programming, AI, particularly Large Language Models (LLMs), and his professional endeavors. Ed describes himself as a code enthusiast and an amateur DJ, and he highlights his role as the co-founder and CTO of Nebula.io, a company focused on leveraging AI to assist individuals in realizing their potential and enhancing talent recruitment processes.

## Key Sections:
- **About**: Ed shares his background and interests, including his experience in coding, LLM experimentation, and music.
- **Professional Background**: He details his work at Nebula.io and his previous startup untapt, emphasizing their innovative approach to AI and talent management.

## News and Announcements:
1. **October 16, 2024**: Resources for transitioning from Software Engineer to AI Data Scientist.
2. **August 6, 2024**: Introduction of the "Outsmart LLM Arena", a competitive platform for LLMs focused on diplomacy and strategy.
3. **June 26, 2024**: Guidance on selecting the right LLM, including tools and resources.
4. **February 7, 2024**: Discussion on fine-tuning LLMs with personalized text simulations.

The site offers insights into Ed's projects and thoughts on LLMs while providing a community connection through his posts.

In [33]:
# A function to display as Markdown
def display_summary(url):
  summary = summarize(url)
  display(Markdown(summary))

In [36]:
display_summary("https://www.learnpytorch.io/00_pytorch_fundamentals/")

# Summary of "PyTorch Fundamentals - Zero to Mastery"

This website provides a comprehensive introduction to PyTorch, focusing on the fundamentals necessary for deep learning. The course is structured into several modules that progressively cover essential topics, culminating in practical exercises to reinforce learning. 

## Key Topics Covered:

1. **Introduction to PyTorch**: 
   - Explanation of what PyTorch is and its applications in various industries, including its significance in companies like Meta, Tesla, and OpenAI.

2. **Tensors**: 
   - Definition and creation of tensors, which are the foundational data structure in PyTorch.
   - Various types of tensors (scalars, vectors, matrices) and operations such as creating random tensors, manipulating data with basic operations, and aggregating values (min, max, mean).

3. **Matrix Multiplication**: 
   - Detailed explanation of matrix multiplication rules and their importance in deep learning. 

4. **Running Tensors on GPUs**: 
   - Guidelines on setting up PyTorch to use GPU, including device management and transferring tensors between CPU and GPU.
   - Discussion on how to check for GPU availability and performance enhancements using NVIDIA or Apple Silicon devices.

5. **Common Errors**: 
   - Identifying shape mismatches and datatype issues, which are frequent in tensor operations.

6. **Data Handling**: 
   - Interoperation with NumPy arrays, the importance of reproducibility in experiments, and techniques for managing randomness.

7. **Additional Resources and Exercises**: 
   - Exercises at the end of each section for hands-on practice, providing links to further tutorials and resources.

## Learning Outcomes:
Through this course, learners will gain a solid foundation in using PyTorch for deep learning tasks, understand how to handle and manipulate tensors, and learn about the best practices for performance optimization in machine learning projects.

### Notes:
The course emphasizes practical coding examples and encourages learners to utilize PyTorch's official documentation for further exploration and problem-solving.