In [13]:
#8. Quiz. Extracting Data

#!/usr/bin/env python
# Your task here is to extract data from xml on authors of an article
# and add it to a list, one item for an author.
# See the provided data structure for the expected format.
# The tags for first name, surname and email should map directly
# to the dictionary keys
import xml.etree.ElementTree as ET
import pprint

article_file = "exampleResearchArticle.xml"


def get_root(fname):
    tree = ET.parse(fname)
    return tree.getroot()


def get_authors(root):
    authors = []
    for author in root.findall('./fm/bibl/aug/au'):
        data = {
                "fnm": None,
                "snm": None,
                "email": None
        }

        # YOUR CODE HERE
        for key in data:
            k = author.find(key)
            if k is not None:
                data[key] = k.text
        authors.append(data)

    return authors


def test():
    solution = [{'fnm': 'Omer', 'snm': 'Mei-Dan', 'email': 'omer@extremegate.com'}, {'fnm': 'Mike', 'snm': 'Carmont', 'email': 'mcarmont@hotmail.com'}, {'fnm': 'Lior', 'snm': 'Laver', 'email': 'laver17@gmail.com'}, {'fnm': 'Meir', 'snm': 'Nyska', 'email': 'nyska@internet-zahav.net'}, {'fnm': 'Hagay', 'snm': 'Kammar', 'email': 'kammarh@gmail.com'}, {'fnm': 'Gideon', 'snm': 'Mann', 'email': 'gideon.mann.md@gmail.com'}, {'fnm': 'Barnaby', 'snm': 'Clarck', 'email': 'barns.nz@gmail.com'}, {'fnm': 'Eugene', 'snm': 'Kots', 'email': 'eukots@gmail.com'}]
    
    root = get_root(article_file)
    data = get_authors(root)

    assert data[0] == solution[0]
    assert data[1]["fnm"] == solution[1]["fnm"]


test()

root = get_root(article_file)
data = get_authors(root)

pprint.pprint(data)

[{'email': 'omer@extremegate.com', 'fnm': 'Omer', 'snm': 'Mei-Dan'},
 {'email': 'mcarmont@hotmail.com', 'fnm': 'Mike', 'snm': 'Carmont'},
 {'email': 'laver17@gmail.com', 'fnm': 'Lior', 'snm': 'Laver'},
 {'email': 'nyska@internet-zahav.net', 'fnm': 'Meir', 'snm': 'Nyska'},
 {'email': 'kammarh@gmail.com', 'fnm': 'Hagay', 'snm': 'Kammar'},
 {'email': 'gideon.mann.md@gmail.com', 'fnm': 'Gideon', 'snm': 'Mann'},
 {'email': 'barns.nz@gmail.com', 'fnm': 'Barnaby', 'snm': 'Clarck'},
 {'email': 'eukots@gmail.com', 'fnm': 'Eugene', 'snm': 'Kots'}]


In [14]:
#9. Quiz: Handling Attributes

#!/usr/bin/env python
# Your task here is to extract data from xml on authors of an article
# and add it to a list, one item for an author.
# See the provided data structure for the expected format.
# The tags for first name, surname and email should map directly
# to the dictionary keys, but you have to extract the attributes from the "insr" tag
# and add them to the list for the dictionary key "insr"
import xml.etree.ElementTree as ET

article_file = "exampleResearchArticle.xml"


def get_root(fname):
    tree = ET.parse(fname)
    return tree.getroot()


def get_authors(root):
    authors = []
    for author in root.findall('./fm/bibl/aug/au'):
        data = {
                "fnm": None,
                "snm": None,
                "email": None,
                "insr": []
        }

        # YOUR CODE HERE
        data["fnm"] = author.find("fnm").text
        data["snm"] = author.find("snm").text
        data["email"] = author.find("email").text
        
        
        #find iids
        insr = author.findall("insr")
        iids = []
        for i in insr:
            iids.append(i.attrib['iid'])
        data["insr"] = iids
        authors.append(data)

    return authors


def test():
    solution = [{'insr': ['I1'], 'fnm': 'Omer', 'snm': 'Mei-Dan', 'email': 'omer@extremegate.com'},
                {'insr': ['I2'], 'fnm': 'Mike', 'snm': 'Carmont', 'email': 'mcarmont@hotmail.com'},
                {'insr': ['I3', 'I4'], 'fnm': 'Lior', 'snm': 'Laver', 'email': 'laver17@gmail.com'},
                {'insr': ['I3'], 'fnm': 'Meir', 'snm': 'Nyska', 'email': 'nyska@internet-zahav.net'},
                {'insr': ['I8'], 'fnm': 'Hagay', 'snm': 'Kammar', 'email': 'kammarh@gmail.com'},
                {'insr': ['I3', 'I5'], 'fnm': 'Gideon', 'snm': 'Mann', 'email': 'gideon.mann.md@gmail.com'},
                {'insr': ['I6'], 'fnm': 'Barnaby', 'snm': 'Clarck', 'email': 'barns.nz@gmail.com'},
                {'insr': ['I7'], 'fnm': 'Eugene', 'snm': 'Kots', 'email': 'eukots@gmail.com'}]

    root = get_root(article_file)
    data = get_authors(root)

    assert data[0] == solution[0]
    assert data[1]["insr"] == solution[1]["insr"]

test()
#pprint.pprint(get_authors(get_root(article_file)))

In [17]:
#19. Quiz Beautiful Soup

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Please note that the function 'make_request' is provided for your reference only.
# You will not be able to to actually use it from within the Udacity web UI.
# Your task is to process the HTML using BeautifulSoup, extract the hidden
# form field values for "__EVENTVALIDATION" and "__VIEWSTATE" and set the appropriate
# values in the data dictionary.
# All your changes should be in the 'extract_data' function
from bs4 import BeautifulSoup
import requests
import json

html_page = "DataElements.htm"


def extract_data(page):
    data = {"eventvalidation": "",
            "viewstate": ""}
    with open(page, "r") as html:
        # do something here to find the necessary values
        soup = BeautifulSoup(open(page), "html.parser")
        
        #long version
        eventvalidation = soup.find(id = '__EVENTVALIDATION')
        data["eventvalidation"] = (eventvalidation['value'])
        
        #concise version
        data["viewstate"] = soup.find(id = '__VIEWSTATE')['value']

    return data


def make_request(data):
    eventvalidation = data["eventvalidation"]
    viewstate = data["viewstate"]

    r = requests.post("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2",
                    data={'AirportList': "BOS",
                          'CarrierList': "VX",
                          'Submit': 'Submit',
                          "__EVENTTARGET": "",
                          "__EVENTARGUMENT": "",
                          "__EVENTVALIDATION": eventvalidation,
                          "__VIEWSTATE": viewstate
                    })

    return r.text


def test():
    data = extract_data(html_page)
    assert data["eventvalidation"] != ""
    assert data["eventvalidation"].startswith("/wEWjAkCoIj1ng0")
    assert data["viewstate"].startswith("/wEPDwUKLTI")

    
#test()
#test doesn't pass because original html page changed over time

u'/wEPDwULLTE3NTUzNjYzMDUPFg4eB3N0ckNvbm4FWlByb3ZpZGVyPS5ORVQgRnJhbWV3b3JrIERhdGEgUHJvdmlkZXIgZm9yIE9EQkM7RFNOPUVuZGVhdm91cjt1aWQ9d2VidXNlcjtwd2Q9IVdlYnVzZXIxMjM0Ox4FTUxpc3QFswEnQVRMJywnQldJJywnQk9TJywnQ0xUJywnTURXJywnT1JEJywnREFMJywnREZXJywnREVOJywnRFRXJywnRkxMJywnSUFIJywnTEFTJywnTEFYJywnTUlBJywnTVNQJywnSkZLJywnTEdBJywnRVdSJywnTUNPJywnUEhMJywnUEhYJywnUERYJywnU0xDJywnU0FOJywnU0ZPJywnU0VBJywnVFBBJywnRENBJywnSUFEJx4MQWlycG9ydF9OYW1lBQ1BbGwgQWlycG9ydHMgHgZIZWFkZXIFB0ZsaWdodHMeCVNlbGVjdGlvbgUcQWxsIENhcnJpZXJzIC0gQWxsIEFpcnBvcnRzIB4EVW5pdGUeBlNvdXJjZQVDPEJSPlNPVVJDRTogQnVyZWF1IG9mIFRyYW5zcG9ydGF0aW9uIFN0YXRpc3RpY3MgVC0xMDAgU2VnbWVudCBkYXRhLhYGAgEPDxYCHgRUZXh0BQdGbGlnaHRzZGQCAg8PFgIfBwUcQWxsIENhcnJpZXJzIC0gQWxsIEFpcnBvcnRzIGRkAgMPZBYUAgEPEA8WAh4UQXBwZW5kRGF0YUJvdW5kSXRlbXNnZA8WEmYCAQICAgMCBAIFAgYCBwIIAgkCCgILAgwCDQIOAg8CEAIRFhIQBR1BbGwgVS5TLiBhbmQgRm9yZWlnbiBDYXJyaWVycwUDQWxsZxAFEUFsbCBVLlMuIENhcnJpZXJzBQVBbGxVU2cQBRRBbGwgRm9yZWlnbiBDYXJyaWVycwUKQWxsRm9yZWlnbmcQBRBBbGFza2EgQWlybGluZXMgBQJBU2