# Navigating the tree

In [1]:
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')

## navigating down

In [5]:
#we can use tag names for referencing
soup.p

<p class="title"><b>The Dormouse's story</b></p>

In [7]:
soup.find_all('p')

[<p class="title"><b>The Dormouse's story</b></p>,
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>,
 <p class="story">...</p>]

In [8]:
soup.head

<head><title>The Dormouse's story</title></head>

In [9]:
soup.title

<title>The Dormouse's story</title>

In [12]:
soup.a.string

'Elsie'

In [13]:
soup.body.b

<b>The Dormouse's story</b>

In [14]:
#using .contents to list the contents
soup.head.contents

[<title>The Dormouse's story</title>]

In [15]:
soup.head.contents[0]

<title>The Dormouse's story</title>

In [17]:
soup.contents

['\n', <html><head><title>The Dormouse's story</title></head>
 <body>
 <p class="title"><b>The Dormouse's story</b></p>
 <p class="story">Once upon a time there were three little sisters; and their names were
 <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
 and they lived at the bottom of a well.</p>
 <p class="story">...</p>
 </body></html>]

In [23]:
title_tag=soup.head.contents[0]
title_tag

<title>The Dormouse's story</title>

In [24]:
#using .children to iterate over tag
for child in title_tag.children:
    print(child)

The Dormouse's story


In [26]:
#.descendants to get all the tags and strings
for i in soup.head.descendants:
    print(i)

<title>The Dormouse's story</title>
The Dormouse's story


In [27]:
len(list(soup.contents))

2

In [28]:
len(list(soup.descendants))

27

In [29]:
#.string canbe used if there is only one child 
title_tag.string

"The Dormouse's story"

In [33]:
#.strings is used to get all the strings
for string in soup.strings:
    print(repr(string))

'\n'
"The Dormouse's story"
'\n'
'\n'
"The Dormouse's story"
'\n'
'Once upon a time there were three little sisters; and their names were\n'
'Elsie'
',\n'
'Lacie'
' and\n'
'Tillie'
';\nand they lived at the bottom of a well.'
'\n'
'...'
'\n'


In [34]:
#to remove etra whitespace we can use stripped_strings
for string in soup.stripped_strings:
    print(string)

The Dormouse's story
The Dormouse's story
Once upon a time there were three little sisters; and their names were
Elsie
,
Lacie
and
Tillie
;
and they lived at the bottom of a well.
...


## Navigating up

In [41]:
#every tag and every string has a parent
title_tag=soup.title
title_tag

<title>The Dormouse's story</title>

In [42]:
title_tag.parent

<head><title>The Dormouse's story</title></head>

In [48]:
title_tag.string.parent

<title>The Dormouse's story</title>

In [47]:
type(title_tag.string.parent)

bs4.element.Tag

In [46]:
html_tag=soup.html
type(html_tag.parent)

bs4.BeautifulSoup

In [51]:
#parents is used to get all the parents of a particular tag
link=soup.a
link

for parent in link.parents:
    if parent is None:
        print(parent)
    else:
        print(parent.name)

p
body
html
[document]


## Going sideways

In [52]:
sibling_soup=BeautifulSoup('<a><b>text1</b><c>text2</c></b></a>')
print(sibling_soup.prettify())

<html>
 <body>
  <a>
   <b>
    text1
   </b>
   <c>
    text2
   </c>
  </a>
 </body>
</html>


In [53]:
#next_sibling
sibling_soup.b.next_sibling

<c>text2</c>

In [55]:
#previous_sibling
sibling_soup.c.previous_sibling

<b>text1</b>

In [57]:
link=soup.a
link.next_sibling

',\n'

In [59]:
link.next_sibling.next_sibling

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>

In [60]:
#next_siblings
for sibling in soup.a.next_siblings:
    print(repr(sibling))

',\n'
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
' and\n'
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
';\nand they lived at the bottom of a well.'


In [61]:
#previous_siblings
for sibling in soup.find('a',id='link3').previous_siblings:
    print(repr(sibling))

' and\n'
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
',\n'
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
'Once upon a time there were three little sisters; and their names were\n'


## Going back and forth

In [63]:
last_tag=soup.find('a',id='link3')
last_tag.next_sibling

';\nand they lived at the bottom of a well.'

In [64]:
#net_sibling and net_element are very different as we can see
last_tag.next_element

'Tillie'

In [65]:
#previous_element
last_tag.previous_element

' and\n'

In [66]:
#next_elements
for element in last_tag.next_elements:
    print(repr(element))

'Tillie'
';\nand they lived at the bottom of a well.'
'\n'
<p class="story">...</p>
'...'
'\n'


In [69]:
#previous_elements
for element in last_tag.previous_elements:
    print((element))

 and

Lacie
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
,

Elsie
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
Once upon a time there were three little sisters; and their names were

<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>


The Dormouse's story
<b>The Dormouse's story</b>
<p class="title"><b>The Dormouse's story</b></p>


<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" h