In [1]:
from bs4 import BeautifulSoup
import requests

#### Open a file, or a web link

In [None]:
# Opening a html file from directory
with open('file_name') as html_file:
    soup = BeautifulSoup(html_file, 'lxml')

In [None]:
html_file = requests.get("http://www.timeslikethese.ca/journal/", verify=False).text
soup = BeautifulSoup(html_file, 'lxml')

In [None]:
print(soup.prettify()) # prettify() method formats indentation properly for viewing

#### Use dot notation to navigate down the hierarachy of the tags

In [None]:
match = soup.title  # Returns the title tag <title></title>
match.text

In [None]:
match = soup.div   # Returns the first div tag <div></div>, and all of its children

#### Use find method to look for tag names with specific class names

In [None]:
content = soup.find('div', class_='content-inner') # Find 'div' tags with attribute of class = 'content-inner'
content = soup.find('div',  attrs={'class':'content-inner'}) # alternatively, pass attribute: value in dictionary

In [None]:
article = content.section.article.header  # navigate down the tag hierachy using dot notation on the tag names

#### find_all method returns a list of tags matching the criteria

In [None]:
articles = soup.find_all('div', class_='entry-title-wrapper')  # use find_all to find a list of tag
articles[0]

In [None]:
articles[0].a.text

In [None]:
for i in articles:
    print (i.a.text)

#### Write data to csv file

In [None]:
import csv
csv_file = open('filename.csv', 'w')
csv_writer = csv.writer(csv_file)

In [None]:
for i in articles:
    csv_writer.writerow([i.a.text, i.time.text])

In [None]:
csv_file.close()

#### Compose API url using the urllib.parse.urlencode method

In [None]:
import urllib.parse
main_api = 'http://maps,googleapis.com/maps/api/geocode/json?'
address = 'lhr'
url = main_api + urllib.parse.urlencode({'address': address, 'id': '5'}) # By passing a dictionary

# Alternatively, use request.get method to specify parameters:
r = requests.get(main_api, params={'address': address, 'id': 5})

#### Make API calls

In [None]:
url = 'https://api.dailysmarty.com/posts'
r = requests.get(url)

In [None]:
r .json()

In [None]:
%pprint   #Note: turn on pprint to make json file more readable

#### Navigate the json using bracket notation

In [None]:
r.json()['posts'][0]['title']