# TOPIC: TIME SERIES 

In [2]:
import requests

# Making a GET request
r = requests.get('https://www.bls.gov/')

# print request object
print(r.url)

# print status code
print(r.status_code)


https://www.bls.gov/
200


# EXTRACTING TITLE

In [4]:
import requests
from bs4 import BeautifulSoup


# Making a GET request
r = requests.get('https://www.bls.gov/')

# Parsing the HTML
soup = BeautifulSoup(r.content, 'html.parser')

# Getting the title tag
print(soup.title)

# Getting the name of the tag
print(soup.title.name)

# Getting the name of parent tag
print(soup.title.parent.name)

# use the child attribute to get
# the name of the child tag


<title>U.S. Bureau of Labor Statistics</title>
title
meta


# EXTRACTING IMAGES

In [9]:
import requests
from bs4 import BeautifulSoup


# Making a GET request
r = requests.get('https://www.bls.gov/')

# Parsing the HTML
soup = BeautifulSoup(r.content, 'html.parser')

images_list = []

images = soup.select('img')
for image in images:
	src = image.get('src')
	alt = image.get('alt')
	images_list.append({"src": src, "alt": alt})
	
for image in images_list:
	print(image)


{'src': 'https://www.dol.gov/themes/opa_theme/img/flag-favicon-57.png', 'alt': 'U.S. flag'}
{'src': 'https://www.dol.gov/themes/opa_theme/img/logo-primary.svg', 'alt': 'Department of Labor Logo'}
{'src': 'https://www.dol.gov/themes/opa_theme/img/icon-dot-gov.svg', 'alt': 'Dot gov'}
{'src': 'https://www.dol.gov/themes/opa_theme/img/icon-https.svg', 'alt': 'Https'}
{'src': '/images/icons/twitter.png', 'alt': 'Follow BLS on Twitter'}
{'src': '/images/layout/homepage/boston.jpg', 'alt': 'boston'}
{'src': '/images/layout/homepage/newyork.jpg', 'alt': 'newyork'}
{'src': '/images/layout/homepage/philadelphia.jpg', 'alt': 'philadelphia'}
{'src': '/images/layout/homepage/atlanta.jpg', 'alt': 'atlanta'}
{'src': '/images/layout/homepage/chicago.jpg', 'alt': 'chicago'}
{'src': '/images/layout/homepage/dallas.jpg', 'alt': 'dallas'}
{'src': '/images/layout/homepage/kansascity.jpg', 'alt': 'kansascity'}
{'src': '/images/layout/homepage/sanfrancisco.jpg', 'alt': 'sanfrancisco'}
{'src': '/ooh/content/o

# EXTRACTING URLS

In [10]:
import requests
from bs4 import BeautifulSoup


# Making a GET request
r = requests.get('https://www.bls.gov/')

# Parsing the HTML
soup = BeautifulSoup(r.content, 'html.parser')

# find all the anchor tags with "href"
for link in soup.find_all('a'):
	print(link.get('href'))


#startcontent
https://www.dol.gov/
https://www.bls.gov/
http://twitter.com/BLS_gov
http://twitter.com/BLS_gov
/schedule/news_release/
https://blogs.bls.gov/blog/
https://www.bls.gov/
/bls/infohome.htm
/bls/history/home.htm
/bls/senior_staff/home.htm
/bls/bls_budget_and_performance.htm
/bls/faqs.htm
/bls/topicsaz.htm
/bls/glossary.htm
/jobs/
/bls/bls-speakers/
/errata/
/bls/contact.htm
/bls/overview.htm
/bls/demographics.htm
/bls/industry.htm
/bls/business.htm
/bls/occupation.htm
/bls/geography.htm
/audience/
/audience/business.htm
/audience/consumers.htm
/audience/developers.htm
/audience/economists.htm
/audience/investors.htm
/audience/jobseekers.htm
/audience/media.htm
/audience/policy.htm
/audience/researcher.htm
/audience/students.htm
/respondents/
/bls/proghome.htm
/bls/inflation.htm
/cpi/
/ppi/
/mxp/
/bls/escalation.htm
/pir/
/bls/wages.htm
/ncs/ect/
/ncs/
/oes/
/cps/earnings.htm#demographics
/ces/
https://www.bls.gov/cew/
/ncs/ebs/
/mwe/
/crp/
/wsp/
/ors/
/bls/unemployment.htm
/

In [1]:
import requests
from bs4 import BeautifulSoup


# Making a GET request
r = requests.get('https://data.bls.gov/cgi-bin/surveymost?bls')

# Parsing the HTML
soup = BeautifulSoup(r.content, 'html.parser')

# Getting the title tag
print(soup.title)

# Getting the name of the tag
print(soup.title.name)

# Getting the name of parent tag
print(soup.title.parent.name)

# use the child attribute to get
# the name of the child tag


<title>Top Picks (Most Requested Statistics) : U.S. Bureau of Labor Statistics</title>
title
head


In [4]:
# Define URL
url = 'https://www.bls.gov/'
# Ask hosting server to fetch url
requests.get(url)

<Response [200]>

In [7]:
# parser-lxml = Change html to Python friendly format
pages = requests.get(url)
pages.text
soup = BeautifulSoup(pages.text, 'lxml')
soup


<!DOCTYPE html>
<html lang="en">
<head>
<!-- P2 -->
<meta charset="utf-8"/><meta content="initial-scale=0.8; width=device-width" name="viewport"/><title>U.S. Bureau of Labor Statistics</title> <!-- ****************************************** Begin META TAGS********************************************* --> <link href="/stylesheets/bls_home.css" media="all" rel="Stylesheet" type="text/css"/> <!-- START include/global/head.stm -->
<script id="_fed_an_ua_tag" src="https://dap.digitalgov.gov/Universal-Federated-Analytics-Min.js?agency=DOL&amp;subagency=BLS&amp;yt=true"></script>
<script src="/javascripts/jquery-latest.js"></script>
<script src="/javascripts/bls-latest.js"></script>
<script src="/javascripts/jquery-tools.js"></script>
<script src="/javascripts/jquery-migrate-1.2.1.min.js"></script>
<link href="/assets/bootstrap/latest/bootstrap.min.css" rel="stylesheet"/>
<script src="/assets/bootstrap/latest/popper.min.js"></script>
<script src="/assets/bootstrap/latest/bootstrap.min.js"></s

In [8]:
# Access h1 tag
soup.h1

<h1 class="invisible">U.S. Bureau of Labor Statistics Home</h1>

In [9]:
# Access header tag
soup.header
# Access div tag
soup.div

<div id="usa-banner-wrapper">
<div class="bootstrap standard-width" id="usa-banner">
<div class="content">
<div class="USA-flag-link">
<img alt="U.S. flag" src="https://www.dol.gov/themes/opa_theme/img/flag-favicon-57.png"/>
<p>An official website of the United States government 
	  
	<button aria-controls="usaBanner" aria-expanded="false" data-target="#usaBanner" data-toggle="collapse" type="button">Here is how you know <span class="oi" data-glyph="chevron-bottom"></span></button>
</p>
</div>
<a class="dolHolder" href="https://www.dol.gov/" id="dolHolder" target="_blank"><img alt="Department of Labor Logo" src="https://www.dol.gov/themes/opa_theme/img/logo-primary.svg"/>  United States Department of Labor</a>
</div>
<div class="collapse" id="usaBanner">
<div class="row">
<div class="col-md-6">
<img alt="Dot gov" src="https://www.dol.gov/themes/opa_theme/img/icon-dot-gov.svg"/>
<p>
<strong>The .gov means it's official.</strong>
<br/> Federal government websites often end in .gov or .mi

In [10]:
# Access string from nested tags
soup.header.p

<p>An official website of the United States government 
	  
	<button aria-controls="usaBanner" aria-expanded="false" data-target="#usaBanner" data-toggle="collapse" type="button">Here is how you know <span class="oi" data-glyph="chevron-bottom"></span></button>
</p>

In [11]:
# Access string from nested tags
soup.header.p
soup.header.p.string

In [12]:
# Access ‘a’ tag in <header>
a_start = soup.header.a
a_start
# Access only the attributes using attrs
a_start.attrs

{'class': ['skiplink'], 'href': '#startcontent'}

In [15]:
a_start['class']

['skiplink']

In [18]:
a_start['new-attribute'] = 'This is the new attribute'
a_start.attrs
a_start

<a class="skiplink" href="#startcontent" new-attribute="This is the new attribute">Skip to Content</a>

In [20]:
# Searching specific attributes of tags
soup.find('h4', class_= 'pull-right price')

In [21]:
# Using filter to find multiple tags
soup.find_all(['h4', 'a', 'p'])
soup.find_all(['header', 'div'])
soup.find_all(id = True) # class and id are special attribute so it can be written like this
soup.find_all(class_= True)

[<a class="skiplink" href="#startcontent" new-attribute="This is the new attribute">Skip to Content</a>,
 <div class="bootstrap standard-width" id="usa-banner">
 <div class="content">
 <div class="USA-flag-link">
 <img alt="U.S. flag" src="https://www.dol.gov/themes/opa_theme/img/flag-favicon-57.png"/>
 <p>An official website of the United States government 
 	  
 	<button aria-controls="usaBanner" aria-expanded="false" data-target="#usaBanner" data-toggle="collapse" type="button">Here is how you know <span class="oi" data-glyph="chevron-bottom"></span></button>
 </p>
 </div>
 <a class="dolHolder" href="https://www.dol.gov/" id="dolHolder" target="_blank"><img alt="Department of Labor Logo" src="https://www.dol.gov/themes/opa_theme/img/logo-primary.svg"/>  United States Department of Labor</a>
 </div>
 <div class="collapse" id="usaBanner">
 <div class="row">
 <div class="col-md-6">
 <img alt="Dot gov" src="https://www.dol.gov/themes/opa_theme/img/icon-dot-gov.svg"/>
 <p>
 <strong>The .