In [75]:
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<div class="brother" id="link11">BroTher</div>,
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

In [76]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')

In [7]:
soup.find_all("title")

[<title>The Dormouse's story</title>]

In [9]:
soup.find_all("html")

[<html><head><title>The Dormouse's story</title></head>
 <body>
 <p class="title"><b>The Dormouse's story</b></p>
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>
 <p class="story">...</p>
 </body></html>]

In [10]:
soup.find_all("p", "title")

[<p class="title"><b>The Dormouse's story</b></p>]

In [11]:
soup.find_all("a")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [12]:
soup.find_all(id="link2")

[<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]

In [23]:
import re
soup.find(string=re.compile("Dormouse"))

"The Dormouse's story"

In [24]:
soup.find_all("a", class_="sister")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [26]:
soup.find_all(class_=re.compile("i"))

[<p class="title"><b>The Dormouse's story</b></p>,
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [61]:
def has_five_characters(css_class):
    return css_class is not None and len(css_class) == 5

In [63]:
len(soup.find_all(class_ = has_five_characters))

3

In [64]:
css_soup = BeautifulSoup('<p class="body strikeout"></p>')

In [65]:
css_soup.find_all("p", class_="strikeout")

[<p class="body strikeout"></p>]

In [66]:
css_soup.find_all("p", class_="body strikeout")

[<p class="body strikeout"></p>]

In [68]:
css_soup.find_all("p", class_="body strikeout")

[<p class="body strikeout"></p>]

In [71]:
soup.find_all(string=["Tillie", "Elsie", "Lacie"])

['Elsie', 'Lacie', 'Tillie']

In [72]:
soup.find_all(string=re.compile("Dormouse"))

["The Dormouse's story", "The Dormouse's story"]

In [73]:
def is_the_only_string_within_a_tag(s):
    """Return True if this string is the only child of its parent tag."""
    return (s == s.parent.string)  


In [77]:
soup.find_all(string=is_the_only_string_within_a_tag)

["The Dormouse's story",
 'BroTher',
 "The Dormouse's story",
 'Elsie',
 'Lacie',
 'Tillie',
 '...']

In [78]:
soup.find_all("a", string="Elsie")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]

In [79]:
soup.find_all("a", text="Elsie")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]

In [81]:
soup.find_all("a")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [82]:
soup.html.find_all("title")

[<title>The Dormouse's story</title>]

In [88]:
soup.find_all("title")

[<title>The Dormouse's story</title>]

In [89]:
soup.find_all("a")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [94]:
soup("div")

[<div class="brother" id="link11">BroTher</div>]

In [100]:
soup.title.find_all(string=True)
soup.title(string=True)

["The Dormouse's story"]

In [101]:
soup.find_all('title', limit=1)

[<title>The Dormouse's story</title>]

In [102]:
soup.find('title')

<title>The Dormouse's story</title>

In [109]:
a_string = soup.find(string="Lacie")
a_string

'Lacie'

In [105]:
a_string.find_parents("a")

[<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]

In [107]:
a_string.find_parent()

In [117]:
first_link = soup.a
first_link

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [123]:
first_link.find_next_siblings("a")

[<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [124]:
first_story_paragraph = soup.find("p", "story")

In [125]:
first_story_paragraph

<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

In [127]:
first_story_paragraph.find_next_siblings("p")

[<p class="story">...</p>]

In [128]:
first_link = soup.a

In [129]:
first_link.find_all_previous("p")

[<p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 <p class="title"><b>The Dormouse's story</b></p>]

In [130]:
first_link.find_previous("title")

<title>The Dormouse's story</title>

In [131]:
soup.select("title")

[<title>The Dormouse's story</title>]

In [137]:
soup.select("p:nth-of-type(3)")

[<p class="story">...</p>]

In [138]:
soup.select("body a")

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [139]:
soup = BeautifulSoup('<b class="boldest">Extremely bold</b>')

In [140]:
tag = soup.b

In [141]:
tag

<b class="boldest">Extremely bold</b>

In [142]:
tag.name = "blockquote"
tag['class'] = 'verybold'
tag['id'] = 1
tag

<blockquote class="verybold" id="1">Extremely bold</blockquote>

In [143]:
del tag['class']
del tag['id']

In [144]:
tag

<blockquote>Extremely bold</blockquote>

In [146]:
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)

tag = soup.a
tag.string = "New link text."
tag

<a href="http://example.com/">New link text.</a>

In [147]:
soup = BeautifulSoup("<a>Foo</a>")
soup.a.append("Bar")

In [148]:
soup

<html><body><a>FooBar</a></body></html>

In [149]:
soup.a.contents

['Foo', 'Bar']

In [154]:
li = [1,2,3,4]
li.append([11,22,33])
li

[1, 2, 3, 4, [11, 22, 33]]

In [155]:
li = [1,2,3,4]
li.extend([11,22,33])
li

[1, 2, 3, 4, 11, 22, 33]

In [156]:
soup = BeautifulSoup("<a>Soup</a>")
soup.a.extend(["'s", " ", "on"])
soup

<html><body><a>Soup's on</a></body></html>

In [157]:
soup.a.contents

['Soup', "'s", ' ', 'on']

In [158]:
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)
tag = soup.a

tag.insert(1, "but did not endorse ")

In [159]:
tag

<a href="http://example.com/">I linked to but did not endorse <i>example.com</i></a>

In [160]:
tag.contents

['I linked to ', 'but did not endorse ', <i>example.com</i>]

In [173]:
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'

In [177]:
soup = BeautifulSoup(markup)
tag = soup.a
tag

<a href="http://example.com/">I linked to <i>example.com</i></a>

In [178]:
tag.clear()
tag

<a href="http://example.com/"></a>

In [180]:
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)
a_tag = soup.a
i_tag = soup.i.extract()

In [182]:
print(a_tag)
print(i_tag)

<a href="http://example.com/">I linked to </a>
<i>example.com</i>


In [183]:
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)
a_tag = soup.a

In [184]:
soup.i.decompose()
a_tag

<a href="http://example.com/">I linked to </a>

In [186]:
soup = BeautifulSoup("<p>I wish I was Italian.</p>")
soup.p.string.wrap(soup.new_tag("i"))

<i>I wish I was Italian.</i>

In [188]:
soup.p.wrap(soup.new_tag("div"))

<div><p><i>I wish I was Italian.</i></p></div>

In [189]:
soup = BeautifulSoup("<p>A one</p>")
soup.p.append(", a two")

soup.p.contents
# [u'A one', u', a two']

print(soup.p.encode())
# <p>A one, a two</p>

print(soup.p.prettify())

b'<p>A one, a two</p>'
<p>
 A one
 , a two
</p>


In [190]:
soup.smooth()
soup.p.contents


['A one, a two']

In [192]:
print(soup.p.prettify())

<p>
 A one, a two
</p>


In [194]:
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)
soup.prettify()
print(soup.prettify())

<html>
 <body>
  <a href="http://example.com/">
   I linked to
   <i>
    example.com
   </i>
  </a>
 </body>
</html>


In [201]:
str(soup)

'<html><body><a href="http://example.com/">I linked to <i>example.com</i></a></body></html>'

In [204]:
soup.a

<a href="http://example.com/">I linked to <i>example.com</i></a>

In [205]:
from bs4.formatter import HTMLFormatter
def uppercase(str):
    return str.upper()
formatter = HTMLFormatter(uppercase)
print(soup.prettify(formatter=formatter))

<html>
 <body>
  <a href="HTTP://EXAMPLE.COM/">
   I LINKED TO
   <i>
    EXAMPLE.COM
   </i>
  </a>
 </body>
</html>


In [211]:
attr_soup = BeautifulSoup(b'<p z="1" m="2" a="3"></p>')
print(attr_soup.p)

class UnsortedAttributes(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            if k == 'm':
                continue
            yield k, v
class UnsortedAttributes2(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            if k == '1':
                continue
            yield k, v

print(attr_soup.p.encode(formatter=UnsortedAttributes()))
print(attr_soup.p.encode(formatter=UnsortedAttributes2()))

<p a="3" m="2" z="1"></p>
b'<p z="1" a="3"></p>'
b'<p z="1" m="2" a="3"></p>'


In [212]:
markup = '<a href="http://example.com/">\nI linked to <i>example.com</i>\n</a>'
soup = BeautifulSoup(markup)

In [213]:
soup.get_text()

'\nI linked to example.com\n'

In [215]:
soup.a.get_text()

'\nI linked to example.com\n'

In [217]:
soup.get_text("|")

'\nI linked to |example.com|\n'

In [222]:
markup = "<h1>Sacr\xc3\xa9 bleu!</h1>"
soup = BeautifulSoup(markup)
soup.h1
soup.h1.string

'SacrÃ© bleu!'

In [223]:
print(soup.original_encoding)

None


In [226]:
markup = b"<h1>\xed\xe5\xec\xf9</h1>"
soup = BeautifulSoup(markup)
print(soup.h1)
soup.original_encoding

<h1>νεμω</h1>


'ISO-8859-7'

In [227]:
soup = BeautifulSoup(markup, exclude_encodings=["ISO-8859-7"])
print(soup.h1)
print(soup.original_encoding)

<h1>íåìù</h1>
windows-1252


In [229]:
markup = b"<p>I just \x93love\x94 Microsoft Word\x92s smart quotes</p>"
markup

b'<p>I just \x93love\x94 Microsoft Word\x92s smart quotes</p>'

In [231]:
UnicodeDammit(markup, ["windows-1252"], smart_quotefrom bs4 import UnicodeDammit
s_to="ascii").unicode_markup

'<p>I just "love" Microsoft Word\'s smart quotes</p>'

In [234]:
markup = "<p>I want <b>pizza</b> and more <b>pizza</b>!</p>"
soup = BeautifulSoup(markup, 'html.parser')
first_b, second_b = soup.find_all('b')
print(first_b)
print(second_b)
print (first_b == second_b)
print (first_b.previous_element == second_b.previous_element)
print (first_b is second_b)

<b>pizza</b>
<b>pizza</b>
True
False
False


In [235]:
import copy
p_copy = copy.copy(soup.p)
print (p_copy)

<p>I want <b>pizza</b> and more <b>pizza</b>!</p>


In [243]:
from bs4 import SoupStrainer
only_a_tags = SoupStrainer("a")
only_tags_with_id_link2 = SoupStrainer(id="link2")

def is_short_string(string):
    return len(string) < 10

only_short_strings = SoupStrainer(string=is_short_string)

In [244]:
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""
print(BeautifulSoup(html_doc, "html.parser", parse_only=only_a_tags).prettify())


<a class="sister" href="http://example.com/elsie" id="link1">
 Elsie
</a>
<a class="sister" href="http://example.com/lacie" id="link2">
 Lacie
</a>
<a class="sister" href="http://example.com/tillie" id="link3">
 Tillie
</a>


In [245]:
print(BeautifulSoup(html_doc, "html.parser", parse_only=only_tags_with_id_link2).prettify())

<a class="sister" href="http://example.com/lacie" id="link2">
 Lacie
</a>


In [252]:
print(BeautifulSoup(html_doc, "html.parser", parse_only=only_short_strings)

SyntaxError: unexpected EOF while parsing (<ipython-input-252-9239f1c12ff9>, line 1)

In [253]:
soup = BeautifulSoup(html_doc)
soup.find_all(only_short_strings)

[<html><head><title>The Dormouse's story</title></head>
 <body>
 <p class="title"><b>The Dormouse's story</b></p>
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>
 <p class="story">...</p>
 </body></html>,
 <head><title>The Dormouse's story</title></head>,
 <title>The Dormouse's story</title>,
 <body>
 <p class="title"><b>The Dormouse's story</b></p>
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bo

In [263]:
from bs4.diagnose import diagnose

with open("../Bulgaria3maps.html") as fp:
    data = fp.read()
print(diagnose(data))

Diagnostic running on Beautiful Soup 4.8.0
Python version 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]
Found lxml version 4.3.4.0
Found html5lib version 1.0.1

Trying to parse your markup with html.parser
Here's what html.parser did with the markup:
<!DOCTYPE html>
<html>
 <head>
  <!-- Include Google Maps JS API -->
  <script src="https://maps.googleapis.com/maps/api/js?key=AIzaSyAP4Pk_GnfkfguMC8Y5HmkhVQfOj3OCbto&amp;sensor=false" type="text/javascript">
  </script>
  <style type="text/css">
   .mapClass { width: 450px; height: 450px; display: inline-block; }
  </style>
  <!-- Map creation is here -->
  <script type="text/javascript">
   //Defining map as a global variable to access from other functions
              var map1, map2, map3;
              //Enabling new cartography and themes
              google.maps.visualRefresh = true;

              function initMapOne() {
                    //Setting starting options of map
                    var mapOptions 