In [3]:
import time
for beep_number in range(1, 5):
    print(f"Beeping {beep_number}")
    time.sleep(2)  # Pauses the script for 3 seconds


Beeping 1
Beeping 2
Beeping 3
Beeping 4


In [2]:
import time
for beep_number in range(1, 5):
    print(f"Beeping {beep_number}")

Beeping 1
Beeping 2
Beeping 3
Beeping 4


In [1]:
import requests
response = requests.get('https://httpbin.org/')
print(response.status_code)
print(response.text)


200
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <title>httpbin.org</title>
    <link href="https://fonts.googleapis.com/css?family=Open+Sans:400,700|Source+Code+Pro:300,600|Titillium+Web:400,600,700"
        rel="stylesheet">
    <link rel="stylesheet" type="text/css" href="/flasgger_static/swagger-ui.css">
    <link rel="icon" type="image/png" href="/static/favicon.ico" sizes="64x64 32x32 16x16" />
    <style>
        html {
            box-sizing: border-box;
            overflow: -moz-scrollbars-vertical;
            overflow-y: scroll;
        }

        *,
        *:before,
        *:after {
            box-sizing: inherit;
        }

        body {
            margin: 0;
            background: #fafafa;
        }
    </style>
</head>

<body>
    <a href="https://github.com/requests/httpbin" class="github-corner" aria-label="View source on Github">
        <svg width="80" height="80" viewBox="0 0 250 250" style="fill:#151513; color:#fff; position: absol

In [2]:
from bs4 import BeautifulSoup

# HTML content for BeautifulSoup initialization
html_init = """
<div class="container">
    <h1 id="main-title">Welcome to Web Scraping</h1>
    <p class="description">Learn how to extract data from websites</p>
</div>
"""

# Create BeautifulSoup object
soup = BeautifulSoup(html_init, 'html.parser')
print(f"Type of object: {type(soup)}")
print(soup.div.p.text)



Type of object: <class 'bs4.BeautifulSoup'>
Learn how to extract data from websites


In [3]:
from bs4 import BeautifulSoup

# HTML content for find() method
html_find = """
<div class="products">
    <div class="product" id="pro1">
        <h3>Laptop</h3>
        <span class="price">$999</span>
    </div>
    <div class="product" id="pro2">
        <h3>Mouse</h3>
        <span class="price">$25</span>
    </div>
</div>
"""

soup = BeautifulSoup(html_find, 'html.parser')

# Find first product
first_product = soup.find('div', class_='product')
print(f"First product div: {first_product}")
print(f"Product name: {first_product.find('h3').text}")
print(f"Product price: {first_product.find('span').text}")

# Find by ID
specific_item = soup.find(id='pro2')
print(f"Item with ID 'pro2': {specific_item.find('h3').text}")

# Find with multiple attributes
price_element = soup.find('span', class_='price') #Finds the first <span> element that has class="price".
print(f"Price element text: {price_element.text}\n")



First product div: <div class="product" id="pro1">
<h3>Laptop</h3>
<span class="price">$999</span>
</div>
Product name: Laptop
Product price: $999
Item with ID 'pro2': Mouse
Price element text: $999



In [4]:
from bs4 import BeautifulSoup

# HTML content for find_all() method
html_find_all = """
<ul class="menu">
    <li class="item active">Home</li>
    <li class="item">Products</li>
    <li class="item">Services</li>
    <li class="item active">Contact</li>
    <li class="item">About</li>
</ul>
"""

soup = BeautifulSoup(html_find_all, 'html.parser')

# Find all list items
all_items = soup.find_all('li')
print(f"All li elements ({len(all_items)} found):")
for item in all_items:
    print(f"  - {item.text}")

# Find all active items
active_items = soup.find_all('li', class_='active')
print(f"\nActive items ({len(active_items)} found):")
for item in active_items:
    print(f"  - {item.text}")

# Find all with limit
limited_items = soup.find_all('li', limit=3)
print(f"\nFirst 3 items:")
for item in limited_items:
    print(f"  - {item.text}")



All li elements (5 found):
  - Home
  - Products
  - Services
  - Contact
  - About

Active items (2 found):
  - Home
  - Contact

First 3 items:
  - Home
  - Products
  - Services


In [5]:
from bs4 import BeautifulSoup

# HTML content for get() method
html_get = """
<div id="user-profile" class="profile-card">
    <img src="avatar.jpg" alt="User Avatar" width="100" height="100">
    <a href="/users/john-doe" title="View Profile" target="_blank">John Doe</a>
    <span class="badge premium">Premium User</span>
</div>
"""

soup = BeautifulSoup(html_get, 'html.parser')

# Get attributes from div
profile_div = soup.find('div', class_='profile-card')
print(f"Div ID: {profile_div.get('id')}")
print(f"Div class: {profile_div.get('class')}")
print(f"Non-existent attribute: {profile_div.get('data-score', 'Not available')}\n")

# Get attributes from image
image = soup.find('img')
print(f"Image source: {image.get('src')}")
print(f"Image alt text: {image.get('alt')}")
print(f"Image width: {image.get('width')}\n")

# Get attributes from link
link = soup.find('a')
print(f"Link href: {link.get('href')}")
print(f"Link title: {link.get('title')}")
print(f"Link target: {link.get('target')}")


Div ID: user-profile
Div class: ['profile-card']
Non-existent attribute: Not available

Image source: avatar.jpg
Image alt text: User Avatar
Image width: 100

Link href: /users/john-doe
Link title: View Profile
Link target: _blank


In [6]:
from bs4 import BeautifulSoup

# HTML content for select() method
html_select = """
<div class="content">
    <header id="main-header">
        <h1 id="title primary">Website Title</h1>
    </header>
    
    <section class="articles">
        <article class="post featured">
            <h2 class="article-title">Featured Post</h2>
            <p class="excerpt">This is a featured article</p>
        </article>
        <article class="post">
            <h2 class="article-title">Regular Post</h2>
            <p class="excerpt">This is a regular article</p>
        </article>
    </section>
    
    <footer class="site-footer">
        <p class="copyright">&copy; 2024 My Website</p>
    </footer>
</div>
"""

soup = BeautifulSoup(html_select, 'html.parser')

# Select by class name
posts = soup.select('.post')
print(f"Select by class '.post': {len(posts)} elements found")
for post in posts:
    print(f"  - {post.find('h2').text}")

# Select by ID
header = soup.select('#main-header')
print(f"\nSelect by ID '#main-header': {header[0].find('h1').text}")

# Select by tag name
all_h2 = soup.select('h2')
print(f"\nSelect by tag 'h2': {len(all_h2)} elements")
for h2 in all_h2:
    print(f"  - {h2.text}")

# Select by multiple classes
featured_post = soup.select('.post.featured')
print(f"\nSelect by multiple classes '.post.featured': {len(featured_post)} element")
print(f"  - {featured_post[0].find('h2').text}")

# Select child elements
titles = soup.select('.post .article-title')
print(f"\nSelect child elements '.post .article-title':")
for title in titles:
    print(f"  - {title.text}")

# Select direct children
direct_children = soup.select('section > article')
print(f"\nSelect direct children 'section > article': {len(direct_children)} elements")

# Select by attribute
primary_title = soup.select('h1[id*="primary"]')
print(f"\nSelect by attribute contains 'primary': {primary_title[0].text}")


Select by class '.post': 2 elements found
  - Featured Post
  - Regular Post

Select by ID '#main-header': Website Title

Select by tag 'h2': 2 elements
  - Featured Post
  - Regular Post

Select by multiple classes '.post.featured': 1 element
  - Featured Post

Select child elements '.post .article-title':
  - Featured Post
  - Regular Post

Select direct children 'section > article': 2 elements

Select by attribute contains 'primary': Website Title


In [None]:
import requests
from bs4 import BeautifulSoup

url = "http://books.toscrape.com/"
response = requests.get(url)	#Send GET request using requests
soup = BeautifulSoup(response.content, 'html.parser')		#parse HTML
books = soup.find_all('article', class_='product_pod')		#Find all book containers
        
print(f"Found total {len(books)} book\n\nPrinting details of first 10 books")

#Extract book details, Show first 10 books
for book in books[:10]:
    #Get book title, price, availability and rating
    title = book.find('h3').find('a').get('title', 'No title')
    price = book.find('p', class_='price_color').text
    availability = book.find('p', class_='instock').text.strip()
    rating_class = book.find('p', class_='star-rating').get('class')[1]		
    #print the details of book
    print(f"Price: {price}, Availability: {availability}, Rating: {rating_class} stars")


In [2]:
from selenium import webdriver
from bs4 import BeautifulSoup

driver = webdriver.Chrome()
driver.get("https://www.amazon.in/s?k=mobiles&crid=BLT35FVNXG5Y&sprefix=mobiles%2Caps%2C340&ref=nb_sb_noss_2")

# Wait and then parse the rendered page
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

# Step 5: Find product containers
products = soup.find_all("div", {"data-component-type": "s-search-result"})

# Step 6: Extract name and price
for product in products:
    # product name
    name = product.h2.text.strip() if product.h2 else "N/A"

    # product price
    price_tag = product.find("span", class_="a-price-whole")
    price = price_tag.text.strip() if price_tag else "N/A"

    print(f"Name: {name}")
    print(f"Price: ₹{price}")
    print("-" * 60)


Name: Samsung Galaxy M06 5G (Sage Green, 4GB RAM, 128 GB Storage) | MediaTek Dimensity 6300 | AnTuTu Score 422K+ | 12 5G Bands| 25W Fast Charging | 4 Gen of OS Upgrades | Without Charger
Price: ₹7,999
------------------------------------------------------------
Name: iPhone 17 Pro Max 256 GB: 17.42 cm (6.9″) Display with Promotion, A19 Pro Chip, Best Battery Life in Any iPhone Ever, Pro Fusion Camera System, Center Stage Front Camera; Cosmic Orange
Price: ₹1,49,900
------------------------------------------------------------
Name: Redmi A4 5G (Starry Black, 4GB RAM, 128GB Storage) | Segment Largest 6.88in 120Hz | 50MP Dual Camera | 18W Fast Charging | Charger in The Box
Price: ₹8,349
------------------------------------------------------------
Name: Samsung Galaxy M06 5G (Blazing Black, 6GB RAM, 128 GB Storage) | MediaTek Dimensity 6300 | AnTuTu Score 422K+ | 12 5G Bands | 25W Fast Charging | 4 Gen of OS Upgrades | Without Charger
Price: ₹8,999
-----------------------------------------

In [None]:
from bs4 import BeautifulSoup

def classify_format(html: str) -> list:
	

html = '<html><body><a href="https://example.com">Link</a></body></html>'
result = classify_format(html)
print(result)
print()

html = '<html><body><a href="/home">Home</a><a href="/about">About</a></body></html>'
result = classify_format(html)
print(result)
print()

html = '<html><body><p>No links here</p></body></html>'
result = classify_format(html)
print(result)
