# Earthquake B-Value Report Generator
## COMP41680/COMP47670 Assignment 1 - Task 1: Data Collection

In [1]:
from datetime import datetime, timedelta
import urllib.request, json
from pathlib import Path
import tkinter as tk

### Step 1: Ask the user how many days in the past they want

In [2]:
duration = 1

def get_duration():
    global duration
    duration = var.get()
    print("User selected: ", duration)
    root.destroy()

root = tk.Tk()
root.title("Please Select")
root.geometry("250x200")
tk.Label(root, text="How many days in the past do you want?", font=("Helvetica", 12)).pack(pady=10)


var = tk.IntVar()
var.set(duration)

numbers = [1, 7, 30, 90, 365]

for num in numbers:
    tk.Radiobutton(root, text=str(num), variable=var, value=num).pack(anchor=tk.W, padx=80)

tk.Button(root, text="Confirm", command=get_duration).pack()

root.mainloop()

User selected:  30


### Step 2: Calculate duration dictionary, prepare for generate endpoint url
We need to split the request into small pieces. Having 20,000 results in one response can cause a server (503) or resource (400) error. Generally, there are fewer than 20,000 earthquakes in 30 days, but in some cases, there may be more than 20,000 earthquakes even in 15 days. Therefore, we select 10 days as the maximum duration for one request.

The endpoint date start at starttime, exclusive endtime.

In [3]:
duration_dict = {}

def calculate_start_date(end_date, days=10):
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    ten_days_ago = end_date - timedelta(days=days)
    start_date = ten_days_ago.strftime("%Y-%m-%d")
    return start_date

today = datetime.now()
end_date = today.strftime("%Y-%m-%d")
days = duration

while days > 0:
    if days > 10:
        start_date = calculate_start_date(end_date)
        days -= 10
    else:
        start_date = calculate_start_date(end_date, days)
        days = 0
    duration_dict[end_date] = start_date
    end_date = start_date
    
print(duration_dict)

{'2024-02-19': '2024-02-09', '2024-02-09': '2024-01-30', '2024-01-30': '2024-01-20'}


### Step 3: Create raw data directory if it does not already exist, or delete previous data

In [4]:
dir_path = Path("data/raw_data")

if dir_path.exists():
    for item in dir_path.iterdir():
        item.unlink()
    print(f"Deleted resources under `{dir_path}`")
else:
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"mkdir `{dir_path}`")

Deleted resources under `data/raw_data`


### Step 4: Collect earthquake data from USGS API, save as JSON

In [5]:
%%time

for endtime in duration_dict:
    starttime = duration_dict[endtime]
    url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={starttime}&endtime={endtime}"
    print("GET " + url)
    response = urllib.request.urlopen(url)
    raw_json = response.read().decode("utf-8")
    data = json.loads(raw_json)

    fname = f"{dir_path}/%s_%s.json" % (starttime, endtime)
    with open(fname, "w") as json_file:
        json.dump(data, json_file)

print(f"Data saved in `./{dir_path}` directory.\n")

GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-09&endtime=2024-02-19
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-30&endtime=2024-02-09
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-20&endtime=2024-01-30
Data saved in `./data/raw_data` directory.

CPU times: user 483 ms, sys: 96.6 ms, total: 580 ms
Wall time: 5.39 s
