In [1]:
import pandas as pd
import requests
import numpy as np
import aiohttp
import asyncio
from datetime import datetime, timedelta
from time import perf_counter
from urllib.parse import urlencode

WAITSECS = 3 # number of seconds to wait after failing download before trying again
SEARCH_LIMIT = 20000 # maximum number of events ComCat will return in one search
API_THROTTLE_LIMIT = 6/10 # min seconds between requests
TIMEOUT = 60
TIMEFMT = "%Y-%m-%dT%H:%M:%S"


In [2]:
def get_time_segments(starttime, endtime, minmag):
    # earthquake frequency table: minmag:earthquakes per day
    freq_table = {
        0: 10000 / 7,
        1: 3500 / 14,
        2: 3000 / 18,
        3: 4000 / 59,
        4: 9000 / 151,
        5: 3000 / 365,
        6: 210 / 365,
        7: 20 / 365,
        8: 5 / 365,
        9: 0.05 / 365,
    }

    floormag = int(np.floor(minmag))
    ndays = (endtime - starttime).days + 1
    freq = freq_table[floormag]
    nsegments = int(np.ceil((freq * ndays) / SEARCH_LIMIT))
    days_per_segment = int(np.ceil(ndays / nsegments))
    segments = []
    startseg = starttime
    endseg = starttime
    while startseg <= endtime:
        endseg = startseg + timedelta(days_per_segment)
        if endseg > endtime:
            endseg = endtime
        segments.append((startseg, endseg))
        startseg += timedelta(days=days_per_segment, microseconds=1)
    return segments
    
async def search(url, session, detail=False): 
    events = []
    tstart = perf_counter()
    try:
        async with session.get(url, timeout=TIMEOUT) as response:
            jdict = await response.json()
            if detail:
                return jdict # circumvents api throttle
            events = jdict["features"]
    except requests.HTTPError as htpe:
        if htpe.code == 503:
            try:
                await asyncio.sleep(WAITSECS)
                async with session.get(url, timeout=TIMEOUT) as response:
                    jdict = await response.json()
                    if detail:
                        return jdict 
                    events = jdict["features"]
            except Exception as msg:
                fmt = '503 Error downloading data from url %s. "%s".'
                raise ConnectionError(fmt % (url, msg))
    except Exception as msg:
        fmt = 'Error downloading data from url %s. "%s".'
        raise ConnectionError(fmt % (url, msg))
    # there is now a throttle on the number of API requests that can be made (500 in 5 minutes.) 
    # To avoid running into the error generated by the server, we're intentionally slowing down the rate of API calls here.
    tend = perf_counter()
    dt = tend - tstart
    if dt < API_THROTTLE_LIMIT:
        await asyncio.sleep(API_THROTTLE_LIMIT)

    return events
    
async def get_data(start_time, end_time):
    start = perf_counter()
    segments = get_time_segments(start_time, end_time, 0)
    events = []
    iseg = 1
    async with aiohttp.ClientSession() as session:
        tasks = []
        for stime, etime in segments:
            fmt = "Searching time segment %i: %s to %s\n"
            print(fmt % (iseg, stime, etime))
            iseg += 1
            parameters = {"format":"geojson", "starttime":stime.strftime(TIMEFMT), "endtime":etime.strftime(TIMEFMT), "limit":20000, "minmagnitude":0, 'orderby':'time-asc', 'eventtype':'earthquake'}
            url = "https://earthquake.usgs.gov/fdsnws/event/1/query?" + urlencode(parameters)
            tasks.append(search(url, session))
        results = await asyncio.gather(*tasks)
        print(f"summary timer {perf_counter()-start}")
        
        '''detail_tasks = []
        for i in range(len(results)):
            for j in results[i]:
                detail_tasks.append(search(j["properties"]["detail"], session, True))
        detail_results = await asyncio.gather(*detail_tasks)'''
        print(f"all timer {perf_counter()-start}")
        #return detail_results
        return results
if __name__ == '__main__':
    data = await get_data(datetime(2020, 1, 1), datetime(2025, 1, 1))
    

Searching time segment 1: 2020-01-01 00:00:00 to 2020-01-15 00:00:00

Searching time segment 2: 2020-01-15 00:00:00.000001 to 2020-01-29 00:00:00.000001

Searching time segment 3: 2020-01-29 00:00:00.000002 to 2020-02-12 00:00:00.000002

Searching time segment 4: 2020-02-12 00:00:00.000003 to 2020-02-26 00:00:00.000003

Searching time segment 5: 2020-02-26 00:00:00.000004 to 2020-03-11 00:00:00.000004

Searching time segment 6: 2020-03-11 00:00:00.000005 to 2020-03-25 00:00:00.000005

Searching time segment 7: 2020-03-25 00:00:00.000006 to 2020-04-08 00:00:00.000006

Searching time segment 8: 2020-04-08 00:00:00.000007 to 2020-04-22 00:00:00.000007

Searching time segment 9: 2020-04-22 00:00:00.000008 to 2020-05-06 00:00:00.000008

Searching time segment 10: 2020-05-06 00:00:00.000009 to 2020-05-20 00:00:00.000009

Searching time segment 11: 2020-05-20 00:00:00.000010 to 2020-06-03 00:00:00.000010

Searching time segment 12: 2020-06-03 00:00:00.000011 to 2020-06-17 00:00:00.000011

Sea

NameError: name 'detail_results' is not defined