In [8]:
"""
Earth Observatory Natural Event Tracker.
"""
import time
import sched
import pandas as pd
import json
import logging
import requests
import numpy as np
from io import StringIO

import utils
#from database import upsert_bpa


DIS_SOURCE = "https://eonet.sci.gsfc.nasa.gov/api/v2.1/events"
MAX_DOWNLOAD_ATTEMPT = 5
DOWNLOAD_PERIOD = 10         # second
logger = logging.Logger(__name__)
utils.setup_logger(logger, 'data.log')

In [9]:
def download_disaster(url=DIS_SOURCE, retries=MAX_DOWNLOAD_ATTEMPT, limit = 10, days = 2):
    """Returns disaster information text from `DIS_SOURCE` that includes disaster information
    Returns None if network failed
    """
    js = None
    for i in range(retries):
        try:
            req = requests.get(f"{url}?limit={limit}&days={days}", timeout=1.0)
            req.raise_for_status()
            text = req.text
            js = json.loads(text)
        except requests.exceptions.HTTPError as e:
            logger.warning("Retry on HTTP Error: {}".format(e))
    if js is None:
        logger.error('download_dis too many FAILED attempts')
    return js

In [10]:
def filter_dis(js):
    """Converts `json` to `DataFrame`
    """
    data = []
    filter_tits = ["Wildfires", "Severe_Storms", "Sea_and_Lake_Ice"]
    for x in js["events"]:
        tit = x["categories"][0]["title"].replace(" ","_")
        if tit not in filter_tits:
            continue
        g = x["geometries"]
        for gg in g:
            dt, geo = pd.to_datetime(gg["date"]), gg['coordinates']
            singled = [tit, dt, geo[0], geo[1]]
            data.append(singled)
    data = np.array(data)
    df = pd.DataFrame(data, columns = ["title", "datetime", "geo1", "geo2"])
    return df

In [24]:
js = download_disaster(url=DIS_SOURCE, retries=MAX_DOWNLOAD_ATTEMPT, limit = 100, days = 100)
df = filter_dis(js)

In [12]:
def update_once():
    t = download_disaster(limit = 10, days = 10)
    df = filter_dis(t)
    #upsert_bpa(df)

In [13]:
def main_loop(timeout=DOWNLOAD_PERIOD):
    scheduler = sched.scheduler(time.time, time.sleep)

    def _worker():
        try:
            update_once()
        except Exception as e:
            logger.warning("main loop worker ignores exception and continues: {}".format(e))
        scheduler.enter(timeout, 1, _worker)    # schedule the next event

    scheduler.enter(0, 1, _worker)              # start the first event
    scheduler.run(blocking=True)

In [14]:
main_loop()

KeyboardInterrupt: 

In [20]:
import logging
import pymongo
import pandas as pd
import expiringdict

import utils

client = pymongo.MongoClient()
logger = logging.Logger(__name__)
utils.setup_logger(logger, 'db.log')
RESULT_CACHE_EXPIRATION = 10             # seconds

In [49]:
def upsert_dis(df):
    """
    Update MongoDB database `disaster` and collection `disasters` with the given `DataFrame`.
    """
    db = client.get_database("disaster")
    collection = db.get_collection("disasters")
    update_count = 0
    for record in df.to_dict('records'):
        print(record)
        result = collection.replace_one(
            filter=record,    # locate the document if exists
            replacement=record,                         # latest document
            upsert=True)                                # update if exists, insert if not
        if result.matched_count > 0:
            update_count += 1
    logger.info("rows={}, update={}, ".format(df.shape[0], update_count) +
                "insert={}".format(df.shape[0]-update_count))


In [50]:
upsert_dis(df)

{'title': 'Wildfires', 'datetime': Timestamp('2019-12-05 15:52:00+0000', tz='UTC'), 'geo1': -71.68626, 'geo2': -33.39903}


KeyboardInterrupt: 

In [26]:
df

Unnamed: 0,title,datetime,geo1,geo2
0,Wildfires,2019-12-05 15:52:00+00:00,-71.6863,-33.399
1,Severe_Storms,2019-12-05 00:00:00+00:00,63.6,-7.8
2,Severe_Storms,2019-12-05 06:00:00+00:00,63.2,-8.6
3,Severe_Storms,2019-12-05 12:00:00+00:00,62.7,-9.2
4,Severe_Storms,2019-12-05 18:00:00+00:00,62.4,-9.9
...,...,...,...,...
460,Sea_and_Lake_Ice,2019-09-13 00:00:00+00:00,-40.25,-76.23
461,Sea_and_Lake_Ice,2019-10-10 00:00:00+00:00,-39.15,-75.78
462,Sea_and_Lake_Ice,2019-10-18 00:00:00+00:00,-37.61,-75.38
463,Sea_and_Lake_Ice,2019-10-25 00:00:00+00:00,-38.96,-75.81
