# Grabbing the title of a page 

In [2]:
# using the request library to grab the page
import requests

res = requests.get('https://www.example.com/')

type(res)

requests.models.Response

In [3]:
res.text

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    <

In [5]:
# using beautiful soup to analyze the extracted page
import bs4

soup = bs4.BeautifulSoup(res.text, "lxml")

soup

<!DOCTYPE html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples

In [9]:
print(soup.select('title'))

print(soup.select('title')[0])

print(soup.select('title')[0].get_text())

[<title>Example Domain</title>]
<title>Example Domain</title>
Example Domain


# Grabbing all elements of a class

![image.png](attachment:image.png)



In [27]:
res = requests.get("https://en.wikipedia.org/wiki/Grace_Hopper")

soup = bs4.BeautifulSoup(res.text, 'lxml')

num_text = soup.select('span')

for item in num_text:
    print(item.get_text)

<bound method PageElement.get_text of <span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span>>
<bound method PageElement.get_text of <span class="vector-dropdown-label-text">Main menu</span>>
<bound method PageElement.get_text of <span>Main page</span>>
<bound method PageElement.get_text of <span>Contents</span>>
<bound method PageElement.get_text of <span>Current events</span>>
<bound method PageElement.get_text of <span>Random article</span>>
<bound method PageElement.get_text of <span>About Wikipedia</span>>
<bound method PageElement.get_text of <span>Contact us</span>>
<bound method PageElement.get_text of <span>Help</span>>
<bound method PageElement.get_text of <span>Learn to edit</span>>
<bound method PageElement.get_text of <span>Community portal</span>>
<bound method PageElement.get_text of <span>Recent changes</span>>
<bound method PageElement.get_text of <span>Upload file</span>>
<bound method PageElement.get_text of <span class="mw-logo-container skin-inv

# Getting an image from website

In [42]:
res = requests.get("https://images.google.com/")

res.text

'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en-IN"><head><meta content="Google Images. The most comprehensive image search on the web." name="description"><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google Images</title><script nonce="rpadfN7NBiBSJTXs67YyVg">(function(){var _g={kEI:\'jM1zZ9CFJrqq1sQPnKjjgQQ\',kEXPI:\'0,3700257,1127,507102,31559,2872,2891,73050,6397,9708,344796,45786,9781,38675,57546,60263,7734,18673,8862,13445,286,3,28990,11106,15977,5203198,10476,582,5992271,30820714,25224045,10336,10736,84045,11643,98,1573,4,9304,15165,8182,5928,41666,1835,21667,6757,23879,9139,4599,328,4459,1766,1117,22290,6,4577,5633,687,7851,22,8393,13589,1133,213,13702,8206,7426,8139,4455,1930,3290,3491,33,1951,7090,17667,10667,5041,13463,3,2,2542,292,8341,41,5607,6275,1757,1,4875,1866,728,3377,8149,5,1,190,621,2087,981,2907,68,3596,1541,304

In [43]:
soup = bs4.BeautifulSoup(res.text, 'lxml')

soup

<!DOCTYPE html>
<html itemscope="" itemtype="http://schema.org/WebPage" lang="en-IN"><head><meta content="Google Images. The most comprehensive image search on the web." name="description"/><meta content="text/html; charset=utf-8" http-equiv="Content-Type"/><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"/><title>Google Images</title><script nonce="rpadfN7NBiBSJTXs67YyVg">(function(){var _g={kEI:'jM1zZ9CFJrqq1sQPnKjjgQQ',kEXPI:'0,3700257,1127,507102,31559,2872,2891,73050,6397,9708,344796,45786,9781,38675,57546,60263,7734,18673,8862,13445,286,3,28990,11106,15977,5203198,10476,582,5992271,30820714,25224045,10336,10736,84045,11643,98,1573,4,9304,15165,8182,5928,41666,1835,21667,6757,23879,9139,4599,328,4459,1766,1117,22290,6,4577,5633,687,7851,22,8393,13589,1133,213,13702,8206,7426,8139,4455,1930,3290,3491,33,1951,7090,17667,10667,5041,13463,3,2,2542,292,8341,41,5607,6275,1757,1,4875,1866,728,3377,8149,5,1,190,621,2087,981,2907,68,3596,1541,304

In [45]:
image_tag = soup.select("img")

image_tag

[]

# Example Project - Working with Multiple Pages and Items

In [75]:
base_url = "http://books.toscrape.com/catalogue/page-{}.html"

two_star_title = []
for i in range(1,51):
    
    page_url = base_url.format(i)
    res = requests.get(page_url)
    
    soup = bs4.BeautifulSoup(res.text, "lxml")
    books = soup.select(".product_pod")
    
    for book in books:
        if len(book.select('.star-rating.Two')) != 0:
            two_star_title.append(book.select('a')[1]['title'])
            

two_star_title

['Starving Hearts (Triangular Trade Trilogy, #1)',
 'Libertarianism for Beginners',
 "It's Only the Himalayas",
 'How Music Works',
 'Maude (1883-1993):She Grew Up with the country',
 "You can't bury them all: Poems",
 'Reasons to Stay Alive',
 'Without Borders (Wanderlove #1)',
 'Soul Reader',
 'Security',
 'Saga, Volume 5 (Saga (Collected Editions) #5)',
 'Reskilling America: Learning to Labor in the Twenty-First Century',
 'Political Suicide: Missteps, Peccadilloes, Bad Calls, Backroom Hijinx, Sordid Pasts, Rotten Breaks, and Just Plain Dumb Mistakes in the Annals of American Politics',
 'Obsidian (Lux #1)',
 'My Paris Kitchen: Recipes and Stories',
 'Masks and Shadows',
 'Lumberjanes, Vol. 2: Friendship to the Max (Lumberjanes #5-8)',
 'Lumberjanes Vol. 3: A Terrible Plan (Lumberjanes #9-12)',
 'Judo: Seven Steps to Black Belt (an Introductory Guide for Beginners)',
 'I Hate Fairyland, Vol. 1: Madly Ever After (I Hate Fairyland (Compilations) #1-5)',
 'Giant Days, Vol. 2 (Giant Day