# **Quick Start**

**Import Beautiful Soup**

In [None]:
from bs4 import BeautifulSoup

Here’s an HTML document I’ll be using as an example throughout this document. It’s part of a story from Alice in Wonderland:

In [None]:
html_doc = """<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

Running the “three sisters” document through Beautiful Soup gives us a BeautifulSoup object, which represents the document as a nested data structure:

In [None]:
soup = BeautifulSoup(html_doc, 'html.parser')
print(soup.prettify())

<html>
 <head>
  <title>
   The Dormouse's story
  </title>
 </head>
 <body>
  <p class="title">
   <b>
    The Dormouse's story
   </b>
  </p>
  <p class="story">
   Once upon a time there were three little sisters; and their names were
   <a class="sister" href="http://example.com/elsie" id="link1">
    Elsie
   </a>
   ,
   <a class="sister" href="http://example.com/lacie" id="link2">
    Lacie
   </a>
   and
   <a class="sister" href="http://example.com/tillie" id="link3">
    Tillie
   </a>
   ;
and they lived at the bottom of a well.
  </p>
  <p class="story">
   ...
  </p>
 </body>
</html>


here's how to navigate through these data structure

In [None]:
soup

<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [None]:
soup.title

<title>The Dormouse's story</title>

In [None]:
soup.p

<p class="title"><b>The Dormouse's story</b></p>

In [None]:
soup.a

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [None]:
soup.title.name

'title'

In [None]:
soup.title.string

"The Dormouse's story"

In [None]:
soup.p.string

"The Dormouse's story"

In [None]:
soup.a.string

'Elsie'

In [None]:
soup.a['href']

'http://example.com/elsie'

In [None]:
soup.p['class']

['title']

In [None]:
soup.a['class']

['sister']

In [None]:
soup.find_all('p')

[<p class="title"><b>The Dormouse's story</b></p>,
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 <p class="story">...</p>]

In [None]:
soup.find(id = 'link2')

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>

In [None]:
soup.find_all('a')

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

one simple task is to find the URL from a page's <a> tag

In [None]:
for link in soup.find_all('a'):
  print(link['href'])

http://example.com/elsie
http://example.com/lacie
http://example.com/tillie


In [None]:
for link in soup.find_all('a'):
  print(link.get('href'))

http://example.com/elsie
http://example.com/lacie
http://example.com/tillie


In [None]:
for link in soup.find_all('a'):
  print(link.get('id'))

link1
link2
link3


In [None]:
for link in soup.find_all('a'):
  print(link.get_text())

Elsie
Lacie
Tillie


In [None]:
for link in soup.find_all('a'):
  print(link['class'])

['sister']
['sister']
['sister']


In [None]:
soup.find_all('p')

[<p class="title"><b>The Dormouse's story</b></p>,
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 <p class="story">...</p>]

In [None]:
for texts in soup.find_all('p'):
  print(texts['class'])


['title']
['story']
['story']


In [None]:
for texts in soup.find_all('p'):
  print(texts.get_text())

The Dormouse's story
Once upon a time there were three little sisters; and their names were
Elsie,
Lacie and
Tillie;
and they lived at the bottom of a well.
...


In [None]:
for texts in soup.find_all('p'):
  if texts['class'] == ['title']:
    print(texts.get_text())

The Dormouse's story


In [None]:
for texts in soup.find_all('p'):
  if texts['class'] == ['story']:
    print(texts.get_text())

Once upon a time there were three little sisters; and their names were
Elsie,
Lacie and
Tillie;
and they lived at the bottom of a well.
...


Another common task is to get all the texts from a html documents

In [None]:
print(soup.get_text())

The Dormouse's story

The Dormouse's story
Once upon a time there were three little sisters; and their names were
Elsie,
Lacie and
Tillie;
and they lived at the bottom of a well.
...



parse html from a file using file handle

In [None]:
'''
with open('filename.html') as filename:
  soup =  BeautifulSoup(filename, 'html.parser')
'''

"\nwith open('filename.html') as filename:\n  soup =  BeautifulSoup(filename, 'html.parser')\n"

# **Kinds of Objects**

Beautiful soup converts a complex HTML document into a complex tree of Python objects. we have to deal with four kind of objects.


1. Tag
2. NavigableString
3. Beautiful Soup 
4. Comment

## **Tag**

A Tag object corresponds to an XML or HTML tag in the original document:

In [None]:
soup = BeautifulSoup('<b class="boldest">Extremely bold</b>', 'html.parser')
tag = soup.b

In [None]:
type(tag)

bs4.element.Tag

Tag have several attributes and methods. most important features of a tag is its name and attribute

### **Name**

In [None]:
soup

<b class="boldest">Extremely bold</b>

In [None]:
# name of the tag
tag.name

'b'

In [None]:
# we can change the name. it will reflect on the HTML document
# before name change
print(soup)

<b class="boldest">Extremely bold</b>


In [None]:
# after name change
tag.name = 'blockquote'
print(soup)

<blockquote class="boldest">Extremely bold</blockquote>


### **Attributes**

A tag may have any number of attributes. The tag <b id="boldest"> has an attribute “id” whose value is “boldest”. You can access a tag’s attributes by treating the tag like a dictionary:

In [None]:
soup = BeautifulSoup('<b id="boldest">bold</b>', 'html.parser')
tag = soup.b
print(tag['id'])
# 'boldest'

boldest


In [None]:
# You can access that dictionary directly as .attrs:
tag.attrs

{'id': 'boldest'}

In [None]:
# You can add, remove, and modify a tag’s attributes. Again, this is done by treating the tag as a dictionary:
tag['id']
tag['id'] = 'very boldest'
tag['another tag'] = 1
tag.attrs

{'id': 'very boldest', 'another tag': 1}

In [None]:
print(tag)

<b another tag="1" id="very boldest">bold</b>


In [None]:
del tag['another tag']

In [None]:
print(tag)

<b id="very boldest">bold</b>


In [None]:
del tag['id']

In [None]:
print(tag)

<b>bold</b>


In [None]:
tag['id'] = 'boldest'
print(tag)

<b id="boldest">bold</b>


####**Multi-value attributes**

The most common multi-valued attribute is class (that is, a tag can have more than one CSS class). Others include rel, rev, accept-charset, headers, and accesskey. Beautiful Soup presents the value(s) of a multi-valued attribute as a list:

In [None]:
css_soup = BeautifulSoup('<p class="body"></p>', 'html.parser')
css_soup.p['class']
# ['body']

['body']

In [None]:
css_soup = BeautifulSoup('<p class="body strikeout"></p>', 'html.parser')
css_soup.p['class']
# ['body', 'strikeout']
# class defined as a multi-value attribute

['body', 'strikeout']

If an attribute looks like it has more than one value, but it’s not a multi-valued attribute as defined by any version of the HTML standard, Beautiful Soup will leave the attribute alone:

In [None]:
id_soup = BeautifulSoup('<p id="my id"></p>', 'html.parser')
id_soup.p['id']
# 'my id'
# as id is not defined as a multi-value attributes. thats why it is showing as a single value attribute

'my id'

When you turn a tag back into a string, multiple attribute values are consolidated:

In [None]:
rel_soup = BeautifulSoup('<p>Back to the <a rel="index">homepage</a></p>', 'html.parser')
rel_soup.a['rel']
# ['index']

['index']

In [None]:
rel_soup.p

<p>Back to the <a rel="index">homepage</a></p>

In [None]:
rel_soup.a['rel'] = ['index', 'contents']
print(rel_soup.p)

<p>Back to the <a rel="index contents">homepage</a></p>


In [None]:
rel_soup.a['rel']
# rel is a multi valued attributes

['index', 'contents']

You can disable this by passing multi_valued_attributes=None as a keyword argument into the BeautifulSoup constructor:

In [None]:
'''
no_list_soup = BeautifulSoup('<p class="body strikeout"></p>', 'html.parser', multi_valued_attributes=None)
no_list_soup.p['class']
# 'body strikeout'
'''

'\nno_list_soup = BeautifulSoup(\'<p class="body strikeout"></p>\', \'html.parser\', multi_valued_attributes=None)\nno_list_soup.p[\'class\']\n# \'body strikeout\'\n'

You can use get_attribute_list to get a value that’s always a list, whether or not it’s a multi-valued atribute:

In [None]:
id_soup

<p id="my id"></p>

In [None]:
id_soup.p['class'] = 'stories'

In [None]:
id_soup

<p class="stories" id="my id"></p>

In [None]:
id_soup.p.get_attribute_list

<bound method Tag.get_attribute_list of <p class="stories" id="my id"></p>>

If you parse a document as XML, there are no multi-valued attributes:

In [None]:
xml_soup = BeautifulSoup('<p class="body strikeout"></p>', 'xml')
xml_soup.p['class']
# 'body strikeout'

'body strikeout'

## **Navigable String**

A string corresponds to a bit of text within a tag. Beautiful Soup uses the NavigableString class to contain these bits of text:

In [None]:
soup = BeautifulSoup('<b class="boldest">Extremely bold</b>', 'html.parser')
tag = soup.b
print(tag.string)
# 'Extremely bold'
type(tag.string)
# <class 'bs4.element.NavigableString'>

Extremely bold


bs4.element.NavigableString

In [None]:
# convert to string
unicode_string = str(tag.string)
print(unicode_string)
# 'Extremely bold'
type(unicode_string)
# <type 'str'>

Extremely bold


str

In [None]:
# You can’t edit a string in place, but you can replace one string with another, using replace_with():
tag.string.replace_with('Not bold anymore')
tag

<b class="boldest">Not bold anymore</b>

## **Beautiful Soup**

In [None]:
doc = BeautifulSoup("<document><content/>INSERT FOOTER HERE</document", "xml")
footer = BeautifulSoup("<footer>Here's the footer</footer>", "xml")
doc.find(text="INSERT FOOTER HERE").replace_with(footer)
# 'INSERT FOOTER HERE'
print(doc)
# <?xml version="1.0" encoding="utf-8"?>
# <document><content/><footer>Here's the footer</footer></document>

<?xml version="1.0" encoding="utf-8"?>
<document><content/><footer>Here's the footer</footer></document>


In [None]:
doc.name

'[document]'

In [None]:
footer.name

'[document]'

## **Comment**

In [None]:
markup = "<b><!--Hey, buddy. Want to buy a used parser?--></b>"
soup = BeautifulSoup(markup, 'html.parser')
comment = soup.b.string
type(comment)
# <class 'bs4.element.Comment'>

bs4.element.Comment

In [None]:
comment

'Hey, buddy. Want to buy a used parser?'

In [None]:
print(soup.b)

<b><!--Hey, buddy. Want to buy a used parser?--></b>


In [None]:
print(soup.b.prettify())

<b>
 <!--Hey, buddy. Want to buy a used parser?-->
</b>


# **Navigating the Tree**

In [None]:
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

In [None]:
soup = BeautifulSoup(html_doc, 'html.parser')

In [None]:
print(soup.prettify())

<html>
 <head>
  <title>
   The Dormouse's story
  </title>
 </head>
 <body>
  <p class="title">
   <b>
    The Dormouse's story
   </b>
  </p>
  <p class="story">
   Once upon a time there were three little sisters; and their names were
   <a class="sister" href="http://example.com/elsie" id="link1">
    Elsie
   </a>
   ,
   <a class="sister" href="http://example.com/lacie" id="link2">
    Lacie
   </a>
   and
   <a class="sister" href="http://example.com/tillie" id="link3">
    Tillie
   </a>
   ;
and they lived at the bottom of a well.
  </p>
  <p class="story">
   ...
  </p>
 </body>
</html>


In [None]:
soup


<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

Navigating using tag names

In [None]:
print(soup.head)
print(soup.title)
print(soup.body.b)
print(soup.p)
print(soup.body.p)
print(soup.a)

<head><title>The Dormouse's story</title></head>
<title>The Dormouse's story</title>
<b>The Dormouse's story</b>
<p class="title"><b>The Dormouse's story</b></p>
<p class="title"><b>The Dormouse's story</b></p>
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>


In [None]:
soup.find_all('a')

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [None]:
soup.find_all('p')

[<p class="title"><b>The Dormouse's story</b></p>,
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 <p class="story">...</p>]

### Contents, Children

In [None]:
soup


<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [None]:
head_tag = soup.head
print(head_tag)
print(head_tag.contents)
print(head_tag.contents[0])
print(head_tag.contents[0].string)
print(head_tag.contents[0].get_text())

<head><title>The Dormouse's story</title></head>
[<title>The Dormouse's story</title>]
<title>The Dormouse's story</title>
The Dormouse's story
The Dormouse's story


In [None]:
title_tag = head_tag.contents[0]
print(title_tag)
print(title_tag.contents[0])

<title>The Dormouse's story</title>
The Dormouse's story


Instead of getting them as a list, you can iterate over a tag’s children using the .children generator:

In [197]:
for child in title_tag.children:
  print(child)

The Dormouse's story


In [200]:
for child in head_tag.children:
  print(child)

<title>The Dormouse's story</title>


**.descendants**

In [206]:
# descendants
for child in head_tag.descendants:
  print(child)

<title>The Dormouse's story</title>
The Dormouse's story


The <head> tag has only one child, but it has two descendants: the <title> tag and the <title> tag’s child. The BeautifulSoup object only has one direct child (the <html> tag), but it has a whole lot of descendants:

In [218]:
# a new line '\n' is showing as a children. i dont know why
len(list(soup.children))


2

In [219]:
len(list(soup.descendants))
# a new line '\n' is showing as a children. i dont know why

27

**.string**

If a tag has only one child, and that child is a NavigableString, the child is made available as .string:

In [222]:
title_tag.string

"The Dormouse's story"

If a tag’s only child is another tag, and that tag has a .string, then the parent tag is considered to have the same .string as its child:

In [223]:
head_tag.contents

[<title>The Dormouse's story</title>]

In [224]:
head_tag.string

"The Dormouse's story"

If a tag contains more than one thing, then it’s not clear what .string should refer to, so .string is defined to be None:

In [225]:
soup.html.string

**.strings and .stripped_strings**

If there’s more than one thing inside a tag, you can still look at just the strings. Use the .strings generator:

In [238]:
for string in soup.strings:
  print(string)



The Dormouse's story




The Dormouse's story


Once upon a time there were three little sisters; and their names were

Elsie
,

Lacie
 and

Tillie
;
and they lived at the bottom of a well.


...




In [239]:
# to remove whitespace use .stripped_strings
for string in soup.stripped_strings:
  print(string)

The Dormouse's story
The Dormouse's story
Once upon a time there were three little sisters; and their names were
Elsie
,
Lacie
and
Tillie
;
and they lived at the bottom of a well.
...


**.parent and .parents**

In [240]:
title_tag.parent

<head><title>The Dormouse's story</title></head>

In [241]:
title_tag.string.parent

<title>The Dormouse's story</title>

In [257]:
head_tag.parent.name

'html'

In [259]:
soup.parent
# none

In [246]:
body_tag = soup.body
body_tag

<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body>

In [256]:
body_tag.parent.name

'html'

In [252]:
link1 = soup.a
print(link1)

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>


In [254]:
for parent in link1.parents:
  print(parent.name)

p
body
html
[document]


In [260]:
for parent in link1.parents:
  print(parent)
  print('\n \n')

<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

 

<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body>

 

<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names wer

### **Going sideways**

In [261]:
sibling_soup = BeautifulSoup("<a><b>text1</b><c>text2</c></a>", 'html.parser')
print(sibling_soup.prettify())

<a>
 <b>
  text1
 </b>
 <c>
  text2
 </c>
</a>


The < b> tag and the < c> tag are at the same level: they’re both direct children of the same tag. We call them siblings. When a document is pretty-printed, siblings show up at the same indentation level.

**.next_sibling, .previous_sibling**

In [262]:
sibling_soup

<a><b>text1</b><c>text2</c></a>

In [265]:
b = sibling_soup.b
print(b)

<b>text1</b>


In [273]:
 # c = sibling_soup.b.next_sibling
 c = b.next_sibling
 print(c)

<c>text2</c>


In [274]:
print(c.next_sibling)


None


In [275]:
print(b.previous_sibling)

None


In [276]:
soup


<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [278]:
link1 = soup.a
print(link1)

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>


In [280]:
comma = link1.next_sibling
print(comma)

,



In [282]:
link2 = comma.next_sibling
print(link2)

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>


In [283]:
ands = link2.next_sibling
print(ands)

 and



**.next_siblings, .previous_siblings**

In [289]:
print(soup.a.string)
for sibling in soup.a.next_siblings:
  # print(sibling.string)
  print(sibling)

Elsie
,

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
 and

<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
;
and they lived at the bottom of a well.


In [290]:
soup.p

<p class="title"><b>The Dormouse's story</b></p>

In [299]:
soup


<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [304]:
for p in soup.p.strings:
  print(p)

The Dormouse's story


### **Going back and forth**

**.next_element, .previous_element**

In [306]:
a = soup.find(id = 'link3')
print(a)
a.next_sibling

<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>


';\nand they lived at the bottom of a well.'

In [307]:
soup


<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [309]:
a = soup.find(id= 'link1')
print(a.next_sibling)

,



In [310]:
print(a.next_element)

Elsie


That’s because in the original markup, the word “Elsie” appeared before that comma. The parser encountered an < a> tag, then the word “Elsie”, then the closing < /a> tag, then the comma and rest of the sentence. The comma is on the same level as the < a> tag, but the word “Elsie” was encountered first.

In [315]:
n_element = a.next_element
print(n_element)

Elsie


In [316]:
n_element.next_element

',\n'

In [317]:
n_element.next_element.next_element
# lacie.previous_element

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>

In [319]:
lacie = n_element.next_element.next_element.next_element
lacie

'Lacie'

In [313]:
type(a.string)

bs4.element.NavigableString

In [320]:
lacie.previous_element

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>

In [321]:
for element in a.next_elements:
  print(element)

Elsie
,

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
Lacie
 and

<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
Tillie
;
and they lived at the bottom of a well.


<p class="story">...</p>
...




In [322]:
soup


<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [328]:
a3 = soup.find(id = 'link3')
print(a3)

<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>


In [329]:
for element in a3.previous_elements:
  print(element)

 and

Lacie
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
,

Elsie
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
Once upon a time there were three little sisters; and their names were

<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>


The Dormouse's story
<b>The Dormouse's story</b>
<p class="title"><b>The Dormouse's story</b></p>


<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" h