In [1]:
import requests
import pymongo
import secrets
from json import JSONDecoder
from time import sleep

In [2]:
mongo_client = pymongo.MongoClient(
    # Docker hostname
    host='mongo',
    port=27017, 
    username=secrets.mongo_user,
    password=secrets.mongo_password,
    connect=True
)

In [3]:
mongo_client.database_names()

['admin', 'config', 'historical_events', 'local', 'pair_db']

In [4]:
def add_events_to_list(pairs):
    # This custom hook is required because the
    # JSON returned from the API is malformed.
    d = {}
    events = []
    for pair in pairs:
        key, val = pair
        if key == 'event':
            events.append(val)
        else:
            d[key] = val
    if len(events) > 0: d['events'] = events
    return d

decoder = JSONDecoder(object_pairs_hook=add_events_to_list)

def decorate_document(document):
    document['year'] = int(document['date'].split('/')[0])
    return document

def get_data_for_year(year):
    url = 'http://www.vizgr.org/historical-events/search.php'
    params = {
        'format': 'json',
        'begin_date': '{}0000'.format(year),
        'end_date': '{}1231'.format(year),
        'lang': 'en'
    }
    response = requests.get(url, params)
    try:
        documents = decoder.decode(response.text)['result']['events']
        return [decorate_document(doc) for doc in documents]
    except:
        print(response.text)
        if 'No events found for this query.' in response.text:
            return []
        else:
            raise

In [5]:
get_data_for_year(1932)

No events found for this query.


[]

In [6]:
db = mongo_client.get_database('historical_events')

In [7]:
collection = db.get_collection('event_descriptions')

In [8]:
# The events provided by this API only go up to the end of 2012
for year in range(1900, 2013):
    print(str(year), end='\r')
    if collection.find_one({'year': year}) is None:
        sleep(1)
        docs = get_data_for_year(year)
        if len(docs) > 0: collection.insert_many(docs)

No events found for this query.
No events found for this query.
No events found for this query.
No events found for this query.
No events found for this query.
No events found for this query.


In [9]:
collection.count()

15433

In [11]:
collection.find_one({'year': 2012})

{'_id': ObjectId('5b074b0b21ef7f0013bf86fc'),
 'date': '2012/01/23',
 'description': ' Iran–European Union relations: The European Union adopts an embargo against Iran in protest of that nation\'s continued effort to enrich uranium.{{cite news|author=Jonathan Marcus |url=<a href="http://www.bbc.co.uk/news/world-europe-16674660">http://www.bbc.co.uk/news/world-europe-16674660</a> |title=\'amp#39BBC News\'amp#39 |publisher=Bbc.co.uk |date=2012-01-23 |accessdate=2012-05-06}}',
 'lang': 'en',
 'category1': 'January',
 'granularity': 'year',
 'year': 2012}