## Request the page using requests

#### beautifulsoup4 is the name used for installation; bs4 is the one to import

In [49]:
import requests
from bs4 import BeautifulSoup

In [59]:
quote_page = 'https://www.bloomberg.com/quote/SPX:IND'
page = requests.get(quote_page)
type(page)

requests.models.Response

In [60]:
type(page)
page.status_code == requests.codes.ok

True

In [62]:
print(page.text[:100])

<!DOCTYPE html>
<html xmlns:og="http://ogp.me/ns#" data-view-uid="0"><head>
<base href='https://www.


#### Check for bad url

In [55]:
bad_page = requests.get('https://www.bloomberg.com/somethingNotExist')
try:
    bad_page.raise_for_status()
except Exception as exc:
    print('There was a problem: %s' % (exc))

There was a problem: 404 Client Error: Not Found for url: https://www.bloomberg.com/somethingNotExist


## Parse the page using bs4 

In [65]:
sp = BeautifulSoup(page.text, 'html5lib')
type(sp)

bs4.BeautifulSoup

In [68]:
sp.select('div.price')

[<div class="price">2,570.39</div>]

#### The select() method will return a list of Tag objects, which is how Beautiful Soup represents an HTML element. The list will contain one Tag object for every match in the BeautifulSoup object’s HTML.

In [69]:
type(sp.select('div.price'))

list

In [72]:
sp.select('div.price')[0]

<div class="price">2,570.39</div>

In [70]:
type(sp.select('div.price')[0])

bs4.element.Tag

In [71]:
str(sp.select('div.price')[0])

'<div class="price">2,570.39</div>'

In [73]:
sp.select('div.price')[0].getText()

'2,570.39'

#### Some other ways to target the same element

In [77]:
sp.find('div', class_ = 'price')

<div class="price">2,570.39</div>

In [78]:
sp.find('div', {'class': 'price'})

<div class="price">2,570.39</div>