## Grabbing a Title

In [1]:
import requests

In [2]:
import bs4 # beautiful soup

In [3]:
result  = requests.get('http://example.com/')

In [4]:
type(result)

requests.models.Response

In [5]:
result.text

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    <

In [6]:
soup = bs4.BeautifulSoup(result.text, 'lxml')

In [7]:
soup

<!DOCTYPE html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples

In [8]:
site_para = soup.select('p')

In [9]:
soup.select('title')

[<title>Example Domain</title>]

In [10]:
soup.select('p')

[<p>This domain is for use in illustrative examples in documents. You may use this
     domain in literature without prior coordination or asking for permission.</p>,
 <p><a href="https://www.iana.org/domains/example">More information...</a></p>]

In [11]:
soup.select('title')[0].getText()

'Example Domain'

In [12]:
site_para[0].getText()

'This domain is for use in illustrative examples in documents. You may use this\n    domain in literature without prior coordination or asking for permission.'

## Grabbing a Class

In [13]:
res = requests.get('https://en.wikipedia.org/wiki/Jonas_Salk')

In [14]:
soup = bs4.BeautifulSoup(res.text, 'lxml')

In [15]:
# soup

In [16]:
soup.select('.toctext')

[<span class="toctext">Early life and education</span>,
 <span class="toctext">Education</span>,
 <span class="toctext">Medical school</span>,
 <span class="toctext">Postgraduate research and early laboratory work</span>,
 <span class="toctext">Polio research</span>,
 <span class="toctext">Becoming a public figure</span>,
 <span class="toctext">Celebrity versus privacy</span>,
 <span class="toctext">Maintaining his individuality</span>,
 <span class="toctext">Establishing the Salk Institute</span>,
 <span class="toctext">AIDS vaccine work</span>,
 <span class="toctext">Salk's "biophilosophy"</span>,
 <span class="toctext">Personal life</span>,
 <span class="toctext">Honors and recognition</span>,
 <span class="toctext">Documentary films</span>,
 <span class="toctext">Salk's book publications</span>,
 <span class="toctext">See also</span>,
 <span class="toctext">References</span>,
 <span class="toctext">Further reading</span>,
 <span class="toctext">External links</span>]

In [17]:
 my_first_item = soup.select('.toctext')[0]

In [18]:
print(my_first_item)
    

<span class="toctext">Early life and education</span>


In [19]:
for item in soup.select('.toctext'):
    print(item.text)

Early life and education
Education
Medical school
Postgraduate research and early laboratory work
Polio research
Becoming a public figure
Celebrity versus privacy
Maintaining his individuality
Establishing the Salk Institute
AIDS vaccine work
Salk's "biophilosophy"
Personal life
Honors and recognition
Documentary films
Salk's book publications
See also
References
Further reading
External links


## Grabbing an Image

In [20]:
res =requests.get('https://en.wikipedia.org/wiki/Llama')

In [21]:
soup =bs4.BeautifulSoup(res.text, 'lxml')

In [22]:
soup.select('img')

[<img alt="Page semi-protected" data-file-height="512" data-file-width="512" decoding="async" height="20" src="//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/30px-Semi-protection-shackle.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/40px-Semi-protection-shackle.svg.png 2x" width="20"/>,
 <img alt="Llama lying down.jpg" data-file-height="2304" data-file-width="3456" decoding="async" height="147" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/b9/Llama_lying_down.jpg/220px-Llama_lying_down.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/b9/Llama_lying_down.jpg/330px-Llama_lying_down.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/b9/Llama_lying_down.jpg/440px-Llama_lying_down.jpg 2x" width="220"/>,
 <img alt="edit" data-file-height="16" data-file-width="16" decoding="a

In [23]:
soup.select('.thumbimage')


[<img alt="" class="thumbimage" data-file-height="3072" data-file-width="2304" decoding="async" height="293" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/220px-A_Quechua_girl_and_her_Llama.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/330px-A_Quechua_girl_and_her_Llama.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/440px-A_Quechua_girl_and_her_Llama.jpg 2x" width="220"/>,
 <img alt="" class="thumbimage" data-file-height="1944" data-file-width="2592" decoding="async" height="165" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Domestic_llama_%282009-05-19%29.jpg/220px-Domestic_llama_%282009-05-19%29.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Domestic_llama_%282009-05-19%29.jpg/330px-Domestic_llama_%282009-05-19%29.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Domestic_llama_%282009-05-19%29.jpg/440

In [24]:
soup.select('.thumbimage')[0]

<img alt="" class="thumbimage" data-file-height="3072" data-file-width="2304" decoding="async" height="293" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/220px-A_Quechua_girl_and_her_Llama.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/330px-A_Quechua_girl_and_her_Llama.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/440px-A_Quechua_girl_and_her_Llama.jpg 2x" width="220"/>

In [25]:
lama = soup.select('.thumbimage')[0]

In [26]:
lama['class']

['thumbimage']

In [27]:
lama['src']

'//upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/220px-A_Quechua_girl_and_her_Llama.jpg'

<img src = '//upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/220px-A_Quechua_girl_and_her_Llama.jpg'>

In [28]:
image_link = requests.get('http://upload.wikimedia.org/wikipedia/commons/thumb/3/37/A_Quechua_girl_and_her_Llama.jpg/220px-A_Quechua_girl_and_her_Llama.jpg')

In [29]:
# image_link.content

In [30]:
pwd

'/Users/ana/Documents/udemy/Data structure basics/09_Web scraping '

In [31]:
f = open('/Users/ana/Desktop/lama.jpg', 'wb') # mode = wb (write binary)

In [32]:
f.write(image_link.content)

22207

In [33]:
f.close()