# Searching and extracting: `find_all()`

In [None]:
# import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Get HTML into Python

In [None]:
# assign HTML url to varible: url
# url ="https://webscraper.io/test-sites/e-commerce/allinone/phones"
url ="https://webscraper.io/test-sites/e-commerce/allinone/computers/tablets"

# get the HTML from url and assign to variable: page
page = requests.get(url)
page # The HTTP 200 OK success status response code = request has succeeded.

<Response [200]>

In [None]:
# grab the HTML as text(string)
# parses it into Python and assign to variable: soup
soup = BeautifulSoup(page.text,"lxml")
soup # retruns the HTML 

## How do we return all prices, not just the first one?
Even though the class and the tag are the same, you can use `find_all()` to return all of of the prices for `h4` with a class of `pull-right price`.

In [None]:
# returns all of the prices in a Python list
soup.find_all('h4', {'class':'pull-right price'})

In [None]:
# again, this returns the same outout as above ⬆️
soup.find_all('h4', class_ = 'pull-right price')

In [None]:
# return the last price using list indexing
soup.find_all('h4', class_ = 'pull-right price')[-1]

<h4 class="pull-right price">$603.99</h4>

In [None]:
# return the second price using list indexing
soup.find_all('h4', class_ = 'pull-right price')[1:3]

[<h4 class="pull-right price">$88.99</h4>,
 <h4 class="pull-right price">$96.99</h4>]

In [None]:
# use the list to return any price as a string assigned to: title_list
title_list = soup.find_all('h4', class_ = 'pull-right price')
title_list[1].string

'$88.99'

In [None]:
# use the list to return any price as a string assigned to: title_list
title_list = soup.find_all('h4', class_ = 'pull-right price')
new_list = title_list[-3:-1]
new_list

[<h4 class="pull-right price">$537.99</h4>,
 <h4 class="pull-right price">$587.99</h4>]

## Return a Python list of prices

In [None]:
out=[]
for elem in title_list:
    out.append(elem.text)
print(out)

['$69.99', '$88.99', '$96.99', '$97.99', '$99.99', '$101.99', '$102.99', '$103.99', '$107.99', '$121.99', '$130.99', '$148.99', '$172.99', '$233.99', '$251.99', '$320.99', '$399.99', '$489.99', '$537.99', '$587.99', '$603.99']


In [None]:
out[2:4]

['$96.99', '$97.99']

In [None]:
out[-3:-1]

['$537.99', '$587.99']

## Multiple tags and Bools

In [None]:
# use a list of multiple tags 
soup.find_all(['a','p','h4'])

In [None]:
# use comparison operator and bools to return all id tags
soup.find_all(id = True)

In [None]:
soup.find_all(string = 'Galaxy Note')

['Galaxy Note', 'Galaxy Note']

## Complie regular expression patterns with `re.complie()`

In [None]:
import re
soup.find_all(string = re.compile('Galaxy'))

['Galaxy Tab 3',
 'Galaxy Tab 3',
 'Galaxy Tab 4',
 'Galaxy Tab',
 'Galaxy Note',
 'Galaxy Note',
 'Galaxy Note 10.1']

In [None]:
 # for example, this returns only two items
soup.find_all(string = 'Galaxy Tab 3')

['Galaxy Tab 3', 'Galaxy Tab 3']

In [None]:
# compiling multiple regular expression pattens
soup.find_all(string = ['Galaxy Tab 3', 'Acer Iconia'])

['Acer Iconia', 'Galaxy Tab 3', 'Galaxy Tab 3']

In [None]:
# compiling regular expression pattens
soup.find_all(string = re.compile('Ide'))

['Lenovo IdeaTab',
 'IdeaTab A3500L',
 'IdeaTab A8-50',
 'IdeaTab A3500-H',
 'IdeaTab S5000']

## Use `re.compile()` to return `h4`s with `class="pull-right price"`
As you can see, this is very useful. Intead of writing multiple lines, you can use `re.compile()` to keep your code DRY.

In [None]:
# instead of soup.find_all('h4', class_ = 'pull-right price')
soup.find_all(class_ = re.compile('price'))

In [None]:
# you can also be explicit, to filter more precisely
soup.find_all('h4', class_ = re.compile('price'))

In [None]:
# you can also be even more explicit
# why does it also return the main div? deFAult is why
soup.find_all(class_ = re.compile('fa'))

In [None]:
# you can also be explicit, to filter more precisely
soup.find_all('span', class_ = re.compile('fa'))

[<span class="fa arrow"></span>, <span class="fa arrow"></span>]

In [None]:
# you can also be explicit, to filter more precisely
soup.find_all('p', class_ = re.compile('pull'))

## Limit your results

In [None]:
# you can also be explicit, to filter more precisely
soup.find_all('p', class_ = re.compile('pull'))