In [1]:
import requests
import pymongo
import secrets
from json import JSONDecoder
from time import sleep

In [2]:
mongo_client = pymongo.MongoClient(
    # Docker hostname
    host='mongo',
    port=27017, 
    username=secrets.mongo_user,
    password=secrets.mongo_password,
    connect=True
)

In [3]:
mongo_client.database_names()

['admin', 'config', 'historical_events', 'local', 'pair_db']

In [4]:
def add_events_to_list(pairs):
    # This custom hook is required because the
    # JSON returned from the API is malformed.
    d = {}
    events = []
    for pair in pairs:
        key, val = pair
        if key == 'event':
            events.append(val)
        else:
            d[key] = val
    if len(events) > 0: d['events'] = events
    return d

decoder = JSONDecoder(object_pairs_hook=add_events_to_list)

def decorate_document(document):
    document['year'] = int(document['date'].split('/')[0])
    return document

def get_data_for_year(year):
    url = 'http://www.vizgr.org/historical-events/search.php'
    params = {
        'format': 'json',
        'begin_date': '{}0000'.format(year),
        'end_date': '{}1231'.format(year),
        'lang': 'en'
    }
    response = requests.get(url, params)
    try:
        documents = decoder.decode(response.text)['result']['events']
        return [decorate_document(doc) for doc in documents]
    except:
        print(response.text)
        if 'No events found for this query.' in response.text:
            return []
        else:
            raise

In [5]:
get_data_for_year(1932)

No events found for this query.


[]

In [6]:
db = mongo_client.get_database('historical_events')

In [7]:
collection = db.get_collection('event_descriptions')

In [11]:
# The events provided by this API only go up to the end of 2012
for year in range(1600, 2013):
    print(str(year), end='\r')
    if collection.find_one({'year': year}) is None:
        sleep(1)
        docs = get_data_for_year(year)
        if len(docs) > 0:
            collection.insert_many(docs)
        else:
            print(year)

No events found for this query.
1831
No events found for this query.
1932
2012

In [12]:
collection.count()

23911

In [9]:
list(collection.find({'year': 1810}))[0:10]

[{'_id': ObjectId('5b0dc6468384e30063f8a688'),
  'date': '1810/01/04',
  'description': ' Australian seal hunter Frederick Hasselborough discovers Campbell Island in the subantarctic.{{cite book|last=Mills|first=William James|title=Exploring polar frontiers: a historical encyclopedia|publisher=ABC-CLIO|year=2003|location=Santa Barbara|url=<a href="http://books.google.com/books?id=PYdBH4dOOM4Campprintsec=frontcoverampdq=Exploring+polar+frontiersamphl=enampei=I21UTKSmJYWcsQOa2YDbAgampsa=Xampoi=book_resultampct=resultampresnum=1ampved=0CCgQ6AEwAA#v=onepageampqampf=false}}">http://books.google.com/books?id=PYdBH4dOOM4Campprintsec=frontcoverampdq=Exploring+polar+frontiersamphl=enampei=I21UTKSmJYWcsQOa2YDbAgampsa=Xampoi=book_resultampct=resultampresnum=1ampved=0CCgQ6AEwAA#v=onepageampqampf=false}}</a>',
  'lang': 'en',
  'category1': 'January&amp;amp;amp;ndash;March',
  'granularity': 'year',
  'year': 1810},
 {'_id': ObjectId('5b0dc6468384e30063f8a689'),
  'date': '1810/01/12',
  'descripti