In [1]:
import pandas as pd
import requests
import datetime

'''df.drop_duplicates(inplace=True) # dates do overlap although miniscule
df.drop("type", axis=1, inplace=True) # redundant
df.drop("updated", axis=1, inplace=True) # no valuable information
df.drop("place", axis=1, inplace=True) # no need to have a reference point when long and lat are provided
df.dropna(inplace=True) # drop all rows with a missing value

for i, r in df.iterrows(): # accurate reviewed data
    if r["status"] == "automatic" or r["status"] == "deleted":
        df.drop(index=i, inplace=True)
df.drop("status", axis=1, inplace=True) # redundant
df["time"] = pd.to_datetime(df["time"]) # convert object to datetime

netlocmag_identical = True
for i, r in df.iterrows(): # check for redundancy in columns
    if r["locationSource"] != r["magSource"] or r["locationSource"] != r["net"]:
        netlocmag_identical = False
if netlocmag_identical: # rename the column to combine and drop the others
    df.rename(columns={"net": "netlocmagSource"}, inplace=True) 
    df.drop("magSource", axis=1, inplace=True)
    df.drop("locationSource", axis=1, inplace=True)
df.reset_index(drop=True, inplace = True)'''
0

0

In [2]:
def getDataFrame(parameters): 
    response = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query", params=parameters)
    features = response.json()['features']
    df = pd.DataFrame(columns=['mag','place','time','updated','tz','url','detail','felt','cdi',
                               'mmi','alert','status','tsunami','sig','net','code','ids','sources',
                               'types','nst','dmin','rms','gap','magType','type','title','type','coordinates','id'])
    for index in range(len(features)):
        feature = features[index]
        prop = feature['properties']
        geom = feature['geometry']
        df.loc[index] = [prop['mag'],prop['place'],prop['time'],prop['updated'],prop['tz'],prop['url'],prop['detail'],prop['felt'],
                        prop['cdi'],prop['mmi'],prop['alert'],prop['status'],prop['tsunami'],prop['sig'],prop['net'],prop['code'],
                        prop['ids'],prop['sources'],prop['types'],prop['nst'],prop['dmin'],prop['rms'],prop['gap'],
                        prop['magType'],prop['type'],prop['title'],geom['type'],geom['coordinates'],feature['id']]
    return df

def getData():
    startDate = datetime.date(2014, 1, 1)
    endDate = datetime.date(2023, 8, 1)
    delta = datetime.timedelta(days=1)
    # these will both window slide until end date is reached
    prevEndDate = startDate
    currEndDate = startDate
    df = pd.DataFrame()
 
    while (currEndDate <= endDate): 
        countParameters = {'format':'geojson', 'starttime': prevEndDate , 'endtime': currEndDate}
        response = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/count", params=countParameters)
        count = response.json()['count']
        print("Start Date: " + str(prevEndDate) + " End Date: " + str(currEndDate) + " Count: " + str(count))
        if 20000 < count: # max number of entries allowed through the API 
            parameters = {'format':'geojson', 'starttime': prevEndDate, 'endtime': currEndDate-delta} # go back to the valid date range
            df = pd.concat([getDataFrame(parameters),df],ignore_index = True) 
            prevEndDate = currEndDate-delta # slide the start date to the current end date
            # store the dataset somewhere #
        else: # if less than 20,000 move the current end date over by one (to get the most amount of entries)
            currEndDate += delta    
    # need to get the last dataset under 20,000
    currEndDate -= delta 
    parameters = {'format':'geojson', 'starttime': prevEndDate, 'endtime': currEndDate}
    df = pd.concat([getDataFrame(parameters),df],ignore_index = True)
    return df

df = getData()
df

Start Date: 2014-01-01 End Date: 2014-01-01 Count: 0
Start Date: 2014-01-01 End Date: 2014-01-02 Count: 325
Start Date: 2014-01-01 End Date: 2014-01-03 Count: 707
Start Date: 2014-01-01 End Date: 2014-01-04 Count: 1038
Start Date: 2014-01-01 End Date: 2014-01-05 Count: 1288
Start Date: 2014-01-01 End Date: 2014-01-06 Count: 1565
Start Date: 2014-01-01 End Date: 2014-01-07 Count: 1885
Start Date: 2014-01-01 End Date: 2014-01-08 Count: 2273
Start Date: 2014-01-01 End Date: 2014-01-09 Count: 2604
Start Date: 2014-01-01 End Date: 2014-01-10 Count: 2919
Start Date: 2014-01-01 End Date: 2014-01-11 Count: 3378
Start Date: 2014-01-01 End Date: 2014-01-12 Count: 3721
Start Date: 2014-01-01 End Date: 2014-01-13 Count: 4229
Start Date: 2014-01-01 End Date: 2014-01-14 Count: 4809
Start Date: 2014-01-01 End Date: 2014-01-15 Count: 5301
Start Date: 2014-01-01 End Date: 2014-01-16 Count: 5687
Start Date: 2014-01-01 End Date: 2014-01-17 Count: 6121
Start Date: 2014-01-01 End Date: 2014-01-18 Count: 65