# Global Earthquake B-Value Report Generator
## COMP41680/COMP47670 Assignment 1 - Task 1: Data Collection

In [1]:
from datetime import datetime, timedelta
import urllib.request, json
from pathlib import Path

### Step 1: Ask the user how many days in the past they want

In [2]:
duration = 35

### Step 2: Calculate duration dictionary, prepare for generate endpoint url
We need to split the request into small pieces. Having 20,000 results in one response can cause a server (503) or resource (400) error. Generally, there are fewer than 20,000 earthquakes in 30 days, but in some cases, there may be more than 20,000 earthquakes even in 15 days. Therefore, we select 10 days as the maximum duration for one request.

In [3]:
duration_dict = {}

def generate_duration_dict(end_date, duration, days=10):
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    ten_days_ago = end_date - timedelta(days=days)
    start_date = ten_days_ago.strftime("%Y-%m-%d")
    duration_dict[end_date.strftime("%Y-%m-%d")] = start_date
    if duration > 10:
        return generate_duration_dict(start_date, duration - 10)
    elif duration > 0:
        return generate_duration_dict(start_date, 0)
    else:
        return

today = datetime.now()
end_date = today.strftime("%Y-%m-%d")
generate_duration_dict(end_date, duration)

print(duration_dict)

{'2024-02-18': '2024-02-08', '2024-02-08': '2024-01-29', '2024-01-29': '2024-01-19', '2024-01-19': '2024-01-09', '2024-01-09': '2023-12-30'}


### Step 3: Create raw data directory if it does not already exist, or delete previous data

In [4]:
dir_path = Path("data/raw_data")

if dir_path.exists():
    for item in dir_path.iterdir():
        item.unlink()
    print(f"Deleted resources under `{dir_path}`")
else:
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"mkdir `{dir_path}`")

Deleted resources under `data/raw_data`


### Step 4: Collect earthquake data from USGS API

In [5]:
%%time

for endtime in duration_dict:
    starttime = duration_dict[endtime]
    url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={starttime}&endtime={endtime}"
    print("GET " + url)
    response = urllib.request.urlopen(url)
    raw_json = response.read().decode("utf-8")
    data = json.loads(raw_json)

    fname = "data/raw_data/%s_%s.json" % (starttime, endtime)
    with open(fname, "w") as json_file:
        json.dump(data, json_file)

print("Data saved in `./data/raw_data` directory.\n")

GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-08&endtime=2024-02-18
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-29&endtime=2024-02-08
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-19&endtime=2024-01-29
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-09&endtime=2024-01-19
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-12-30&endtime=2024-01-09
Data saved in `./data/raw_data` directory.

CPU times: user 790 ms, sys: 135 ms, total: 926 ms
Wall time: 12.3 s
