In [52]:
from bs4 import BeautifulSoup
import requests

In [54]:
r = requests.get('http://www.example.com')

ex_html = r.text
ex_html

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 50px;\n        background-color: #fff;\n        border-radius: 1em;\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        body {\n            background-color: #fff;\n        }\n        div {\n            width: auto;\n            margin: 0 auto;\n            border-radius: 0;\n            padding: 1em;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n

In [17]:
soup = BeautifulSoup(ex_html, 'html.parser')
soup

<!DOCTYPE doctype html>

<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 50px;
        background-color: #fff;
        border-radius: 1em;
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        body {
            background-color: #fff;
        }
        div {
            width: auto;
            margin: 0 auto;
            border-radius: 0;
            padding: 1em;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is established to be used

### calling tags

In [18]:
soup.title

<title>Example Domain</title>

In [19]:
soup.body

<body>
<div>
<h1>Example Domain</h1>
<p>This domain is established to be used for illustrative examples in documents. You may use this
    domain in examples without prior coordination or asking for permission.</p>
<p><a href="http://www.iana.org/domains/example">More information...</a></p>
</div>
</body>

In [20]:
soup.body.h1

<h1>Example Domain</h1>

In [21]:
soup.a

<a href="http://www.iana.org/domains/example">More information...</a>

In [22]:
soup.find_all('a')

[<a href="http://www.iana.org/domains/example">More information...</a>]

### key beautiful soup functions

In [27]:
# contents
soup.title.contents #returns list of tag contents

['Example Domain']

In [33]:
soup.title.string #returns contents as string

'Example Domain'

In [30]:
# child
i = a
for child in soup.body.div.children:
    print(i, '\t', child)
    i += 1

0 	 

1 	 <h1>Example Domain</h1>
2 	 

3 	 <p>This domain is established to be used for illustrative examples in documents. You may use this
    domain in examples without prior coordination or asking for permission.</p>
4 	 

5 	 <p><a href="http://www.iana.org/domains/example">More information...</a></p>
6 	 



In [32]:
# descendenants
i = 0
for child in soup.body.div.descendants:
    print(i, '\t', child)
    i += 1

0 	 

1 	 <h1>Example Domain</h1>
2 	 Example Domain
3 	 

4 	 <p>This domain is established to be used for illustrative examples in documents. You may use this
    domain in examples without prior coordination or asking for permission.</p>
5 	 This domain is established to be used for illustrative examples in documents. You may use this
    domain in examples without prior coordination or asking for permission.
6 	 

7 	 <p><a href="http://www.iana.org/domains/example">More information...</a></p>
8 	 <a href="http://www.iana.org/domains/example">More information...</a>
9 	 More information...
10 	 



In [35]:
# parent
soup.body.parent

<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 50px;
        background-color: #fff;
        border-radius: 1em;
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        body {
            background-color: #fff;
        }
        div {
            width: auto;
            margin: 0 auto;
            border-radius: 0;
            padding: 1em;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is established to be used for illustrative example

In [37]:
# parent (again)
x = soup.title.string
x.parent

<title>Example Domain</title>

### searching in beautiful soup

In [41]:
soup.find_all('p')

[<p>This domain is established to be used for illustrative examples in documents. You may use this
     domain in examples without prior coordination or asking for permission.</p>,
 <p><a href="http://www.iana.org/domains/example">More information...</a></p>]

In [45]:
import re
soup.find_all(re.compile('(p|h1)'))

[<h1>Example Domain</h1>,
 <p>This domain is established to be used for illustrative examples in documents. You may use this
     domain in examples without prior coordination or asking for permission.</p>,
 <p><a href="http://www.iana.org/domains/example">More information...</a></p>]

In [51]:
soup.find_all(href = re.compile('example'))

[<a href="http://www.iana.org/domains/example">More information...</a>]