## Buscando elementos
***

In [1]:
from bs4 import BeautifulSoup

In [2]:
with open('arquivo.html', 'r') as file:
    soup = BeautifulSoup(file, 'lxml')

***
### find
***

Busca a primeira ocorrencia de um determinado elemento

In [3]:
tag = soup.find('th')
print(tag)

<th>Nome</th>


In [4]:
text = soup.find(string="Moe")
print(text)

Moe


In [5]:
text = soup.find(string="parturient")
print(text)

None


In [6]:
tag = soup.find(id="site")
print(tag)
print(tag['href'])

<a href="http://www.google.com" id="site">
<span class="no-decoration">Google</span>
</a>
http://www.google.com


In [7]:
tag = soup.find(id="july")
print(tag)

<td id="july">July</td>


In [8]:
tag = soup.find(attrs={'class': 'email'})
print(tag)

<th class="email">Email</th>


In [9]:
tag = soup.find(class_="email")
print(tag)

<th class="email">Email</th>


In [10]:
tag = soup.find('td', class_="email")
print(tag)

<td class="email">john@example.com</td>


In [11]:
tag = soup.tbody.find(class_="email")
print(tag)

<td class="email">john@example.com</td>


In [12]:
def is_mary(tag):
    """
    Return a True if tag has ID mary, False otherwise
    """
    
    return tag.has_attr('id') and tag.get('id') == 'mary'

In [13]:
tag_mary = soup.find(is_mary)
print(tag_mary)
print(tag_mary.string)

<td id="mary">Mary</td>
Mary


***
### find_all
***

Busca todas as ocorrencias descendo a arvore de tags

In [14]:
tags = soup.find_all('td')
print(tags)

[<td id="john">John</td>, <td>Doe</td>, <td class="email">john@example.com</td>, <td id="mary">Mary</td>, <td>Moe</td>, <td class="email">mary@example.com</td>, <td id="july">July</td>, <td>Dooley</td>, <td class="email">july@example.com</td>, <td id="mary">Cris</td>, <td>Moe</td>, <td class="email">cris@example.com</td>]


In [15]:
tags = soup.find_all(['td', 'th'])
print(tags)

[<th>Nome</th>, <th>Sobrenome</th>, <th class="email">Email</th>, <td id="john">John</td>, <td>Doe</td>, <td class="email">john@example.com</td>, <td id="mary">Mary</td>, <td>Moe</td>, <td class="email">mary@example.com</td>, <td id="july">July</td>, <td>Dooley</td>, <td class="email">july@example.com</td>, <td id="mary">Cris</td>, <td>Moe</td>, <td class="email">cris@example.com</td>]


In [16]:
tags = soup.find_all('td', limit=3)
print(tags)

[<td id="john">John</td>, <td>Doe</td>, <td class="email">john@example.com</td>]


In [17]:
tags = soup.find_all('td', class_="email")
print(tags)

[<td class="email">john@example.com</td>, <td class="email">mary@example.com</td>, <td class="email">july@example.com</td>, <td class="email">cris@example.com</td>]


In [18]:
tags = soup.find_all(class_=["email", "site"])
print(tags)

[<th class="email">Email</th>, <td class="email">john@example.com</td>, <td class="email">mary@example.com</td>, <td class="email">july@example.com</td>, <td class="email">cris@example.com</td>]


In [19]:
tags = soup.tbody.find_all(class_="email")
print(tags)

[<td class="email">john@example.com</td>, <td class="email">mary@example.com</td>, <td class="email">july@example.com</td>, <td class="email">cris@example.com</td>]


In [20]:
texts = soup.find_all(string="Moe")
print(texts)

['Moe', 'Moe']


In [21]:
texts = soup.find_all(string=["Moe", "Mary"])
print(texts)

['Mary', 'Moe', 'Moe']


In [22]:
all_texts = soup.find_all(string=True)
print(all_texts)

['html', '\n', '\n', 'Exemplo Bootstrap', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n          Meu projeto em Bootstrap\n      ', '\n', '\n        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam id ornare odio, eu tincidunt massa.\n        In eget lorem augue. Sed auctor lacinia enim eu volutpat. Etiam sed massa dapibus, egestas ligula vel,\n        vulputate ex. Sed efficitur enim ac enim pharetra consequat. Pellentesque ante lorem, tempor eu ex in,\n        tincidunt pharetra diam. Sed ut mi iaculis, condimentum neque nec, cursus massa. Mauris ligula risus\n        euismod sit amet aliquam ut, consequat sit amet orci. Suspendisse vestibulum pellentesque metus\n        ultrices aliquam orci ullamcorper eget. Suspendisse a mattis felis.\n\n        ', 'Sed', ' imperdiet metus nec dui aliquet, sed sollicitudin augue dictum. Sed et condimentum risus, id luctus eros.\n        Phasellus vehicula, enim vel posuere gravida, felis massa volutpat mi, id euismod mi l

***
### find_parent(s)
***

Busca as ocorrencias subindo a arvore de tags, fazem tudo que o find e find_all faz

In [23]:
tag = soup.tbody.find(id="john")
print(tag)

<td id="john">John</td>


In [24]:
tag = soup.tbody.find_parent(id="john")
print(tag)

None


In [25]:
tag = soup.span.find_parent(id="site")
print(tag)

<a href="http://www.google.com" id="site">
<span class="no-decoration">Google</span>
</a>


***

In [26]:
tags = soup.tbody.find_all(class_="email")
print(tags)

[<td class="email">john@example.com</td>, <td class="email">mary@example.com</td>, <td class="email">july@example.com</td>, <td class="email">cris@example.com</td>]


In [27]:
tags = soup.tbody.find_parents(class_="email")
print(tags)

[]


In [28]:
tags = soup.span.find_parents(id="site")
print(tags)

[<a href="http://www.google.com" id="site">
<span class="no-decoration">Google</span>
</a>]


***
### find_next_sibling(s) e find_previous_sibling(s)
***

Busca pelos irmãos, ou seja. filhos da mesma tag pai

In [29]:
tag = soup.find(id='menino-john')
print(tag)

<tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>


In [30]:
next_sibling = tag.find_next_sibling()
print(next_sibling)

<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>


In [31]:
next_sibling = next_sibling.find_next_sibling()
print(next_sibling)

<tr>
<td id="july">July</td>
<td>Dooley</td>
<td class="email">july@example.com</td>
</tr>


In [32]:
previous_sibling = next_sibling.find_previous_sibling()
print(previous_sibling)

<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>


In [33]:
next_siblings = tag.find_next_siblings()
print(next_siblings)

[<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>, <tr>
<td id="july">July</td>
<td>Dooley</td>
<td class="email">july@example.com</td>
</tr>, <tr>
<td id="mary">Cris</td>
<td>Moe</td>
<td class="email">cris@example.com</td>
</tr>]


In [34]:
previous_siblings = next_sibling.find_previous_siblings()
print(previous_siblings)

[<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>, <tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>]


***
### find_next, find_all_next, find_previous e find_all_previous
***

Buscando o próximo elemento e o anterior (segue para o próximo elemento sem respeitar a hierarquia)

In [35]:
tag = soup.find(id='menino-john')
print(tag)

<tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>


In [36]:
tag_next = tag.find_next()
print(tag_next)

<td id="john">John</td>


In [37]:
tag_nexts = tag.find_all_next()
print(tag_nexts)

[<td id="john">John</td>, <td>Doe</td>, <td class="email">john@example.com</td>, <tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>, <td id="mary">Mary</td>, <td>Moe</td>, <td class="email">mary@example.com</td>, <tr>
<td id="july">July</td>
<td>Dooley</td>
<td class="email">july@example.com</td>
</tr>, <td id="july">July</td>, <td>Dooley</td>, <td class="email">july@example.com</td>, <tr>
<td id="mary">Cris</td>
<td>Moe</td>
<td class="email">cris@example.com</td>
</tr>, <td id="mary">Cris</td>, <td>Moe</td>, <td class="email">cris@example.com</td>]


***

In [38]:
tag = soup.find(id='menino-john')
print(tag)

<tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>


In [39]:
tag_previous = tag.find_previous()
print(tag_previous)

<tbody>
<tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>
<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>
<tr>
<td id="july">July</td>
<td>Dooley</td>
<td class="email">july@example.com</td>
</tr>
<tr>
<td id="mary">Cris</td>
<td>Moe</td>
<td class="email">cris@example.com</td>
</tr>
</tbody>


In [40]:
tag_previous = tag.find_all_previous()
print(tag_previous)

[<tbody>
<tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>
<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>
<tr>
<td id="july">July</td>
<td>Dooley</td>
<td class="email">july@example.com</td>
</tr>
<tr>
<td id="mary">Cris</td>
<td>Moe</td>
<td class="email">cris@example.com</td>
</tr>
</tbody>, <th class="email">Email</th>, <th>Sobrenome</th>, <th>Nome</th>, <tr>
<th>Nome</th>
<th>Sobrenome</th>
<th class="email">Email</th>
</tr>, <thead>
<tr>
<th>Nome</th>
<th>Sobrenome</th>
<th class="email">Email</th>
</tr>
</thead>, <table class="table table-hover">
<thead>
<tr>
<th>Nome</th>
<th>Sobrenome</th>
<th class="email">Email</th>
</tr>
</thead>
<tbody>
<tr id="menino-john">
<td id="john">John</td>
<td>Doe</td>
<td class="email">john@example.com</td>
</tr>
<tr>
<td id="mary">Mary</td>
<td>Moe</td>
<td class="email">mary@example.com</td>
</tr>
<tr>
<td id="july">July</td>
<td>Dooley</td>
<td class