# Getting Data

In [1]:
import requests # much better than urllib2, but not in standard lib

response = requests.get('https://en.wikipedia.org/wiki/Nobel_Prize')
print(dir(response))
print(response.status_code)
print(response.headers)

['__attrs__', '__bool__', '__class__', '__delattr__', '__dict__', '__doc__', '__enter__', '__exit__', '__format__', '__getattribute__', '__getstate__', '__hash__', '__init__', '__iter__', '__module__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_content', '_content_consumed', '_next', 'apparent_encoding', 'close', 'connection', 'content', 'cookies', 'elapsed', 'encoding', 'headers', 'history', 'is_permanent_redirect', 'is_redirect', 'iter_content', 'iter_lines', 'json', 'links', 'next', 'ok', 'raise_for_status', 'raw', 'reason', 'request', 'status_code', 'text', 'url']
200
{'Content-Length': '71356', 'Content-language': 'en', 'X-Powered-By': 'HHVM/3.18.2', 'X-Cache-Status': 'hit', 'Last-Modified': 'Sat, 19 Aug 2017 00:20:34 GMT', 'X-Client-IP': '75.147.128.193', 'Link': '</static/images/project-logos/enwiki.png>;rel=preload;as=image;media=not all and (min-resolution: 1.

## Getting ready-to-use files off internet using requests

In [2]:
import requests

response = requests.get('https://chhs.data.ca.gov/api/views/pbxw-hhq8/rows.json?accessType=DOWNLOAD')
print(response.status_code)
if response.status_code == 200:
    data = response.json() # convenience method from requests returns a dict
    print(data.keys())

404


## Getting data from REST apis using requests

In [3]:
OECD_ROOT_URL = 'http://stats.oecd.org/sdmx-json/data'

def make_OECD_request(dsname, dimensions, params=None, root_dir=OECD_ROOT_URL):
    '''Make a URL for OECD API and return a response'''
    if not params:
        params = {} # don't use {} or [] as a default arg ever
    dim_args = ['+'.join(d) for d in dimensions]
    dim_str = '.'.join(dim_args)
    url = root_dir + '/' + dsname + dim_str + '/all'
    print('Requesting URL: '+ url)
    return requests.get(url, params=params)

response = make_OECD_request('QNA',
            (('USA', 'AUS'), ('GDP', 'B1-GE'), ('CUR', 'VOBARSA'), ('Q')),
            {'startTime': '2009-Q1', 'endTime':'2010-Q1'})
if response.status_code == 200:
    json = respons.json()
    print(json.keys())
else:
    print('Request returned a {}'.format(response.status_code))

Requesting URL: http://stats.oecd.org/sdmx-json/data/QNAUSA+AUS.GDP+B1-GE.CUR+VOBARSA.Q/all
Request returned a 400


In [4]:
REST_EU_ROOT_URL = 'http://restcountries.eu/rest/v1'

def REST_country_request(field='all', name=None, params=None):
    headers = {'User-Agent': 'Mozilla/5.0'}
    if not params:
        params =  []
        
    if field == 'all':
        return requests.get(REST_EU_ROOT_URL + '/all', headers=headers)
    
    url = '{}/{}/{}'.format(REST_EU_ROOT_URL, field, name)
    print('Requesting URL: {}'.format(url))
    response = requests.get(url, params=params, headers=headers)
    
    if not response.status_code == 200:
        raise Exception('Request failed with status code {}'.format(response.status_code))
        
    return response

response = REST_country_request('currency', 'usd') # get countries that use us dollar
first_three_items = response.json()[:2]
print(first_three_items)
    

Requesting URL: http://restcountries.eu/rest/v1/currency/usd
[{u'timezones': [u'UTC-11:00'], u'demonym': u'American Samoan', u'currencies': [u'USD'], u'alpha2Code': u'AS', u'alpha3Code': u'ASM', u'area': 199.0, u'languages': [u'en', u'sm'], u'capital': u'Pago Pago', u'relevance': u'0.5', u'borders': [], u'altSpellings': [u'AS', u'Amerika S\u0101moa', u'Amelika S\u0101moa', u'S\u0101moa Amelika'], u'gini': None, u'translations': {u'fr': u'Samoa am\xe9ricaines', u'de': u'Amerikanisch-Samoa', u'ja': u'\u30a2\u30e1\u30ea\u30ab\u9818\u30b5\u30e2\u30a2', u'es': u'Samoa Americana', u'it': u'Samoa Americane'}, u'nativeName': u'American Samoa', u'topLevelDomain': [u'.as'], u'numericCode': u'016', u'population': 55519, u'callingCodes': [u'1684'], u'name': u'American Samoa', u'region': u'Oceania', u'subregion': u'Polynesia', u'latlng': [-14.33333333, -170.0]}, {u'timezones': [u'UTC-04:00'], u'demonym': u'Dutch', u'currencies': [u'USD'], u'alpha2Code': u'BQ', u'alpha3Code': u'BES', u'area': 294.0,

The entire countries data isn't that big, so we will download it all and store it in a mongodb

In [5]:
## REPEAT FROM STORING_DATA.IPYNB
from pymongo import MongoClient

def get_mongo_database(db_name, host='localhost', port=27017, username=None, password=None):
    '''Get named database from MongoDB with/out authentication'''
    if username and password:
        mongo_uri = 'mongodb://{}:{}@{}/{}'.format(username, password, host, db_name)
        conn = MongoClient(mongo_uri)
    else:
        conn = MongoClient(host, port)
        
    return conn[db_name]

In [6]:
db_nobel = get_mongo_database('nobel_prize')
coll = db_nobel['country_data']

# Get ALL the country data
response = REST_country_request()
# Insert JSON objects straight into our collection
coll.insert_many(response.json())

<pymongo.results.InsertManyResult at 0x7fb33ef92320>

In [7]:
# Now we can query our mongodb
res = coll.find({'currencies': {'$in':['USD']}})
first_three_items = list(res)[:2]
print(first_three_items)

[{u'timezones': [u'UTC-11:00'], u'demonym': u'American Samoan', u'subregion': u'Polynesia', u'alpha2Code': u'AS', u'gini': None, u'alpha3Code': u'ASM', u'area': 199.0, u'languages': [u'en', u'sm'], u'capital': u'Pago Pago', u'relevance': u'0.5', u'borders': [], u'altSpellings': [u'AS', u'Amerika S\u0101moa', u'Amelika S\u0101moa', u'S\u0101moa Amelika'], u'currencies': [u'USD'], u'translations': {u'fr': u'Samoa am\xe9ricaines', u'de': u'Amerikanisch-Samoa', u'ja': u'\u30a2\u30e1\u30ea\u30ab\u9818\u30b5\u30e2\u30a2', u'es': u'Samoa Americana', u'it': u'Samoa Americane'}, u'nativeName': u'American Samoa', u'topLevelDomain': [u'.as'], u'numericCode': u'016', u'population': 55519, u'callingCodes': [u'1684'], u'name': u'American Samoa', u'region': u'Oceania', u'_id': ObjectId('599d3048a2c4bb2189ca8ae0'), u'latlng': [-14.33333333, -170.0]}, {u'timezones': [u'UTC-04:00'], u'demonym': u'Dutch', u'subregion': u'Caribbean', u'alpha2Code': u'BQ', u'gini': None, u'alpha3Code': u'BES', u'area': 294