# Earthquake B-Value Report Generator
## COMP41680/COMP47670 Assignment 1 - Task 1: Data Collection

In [1]:
import json, re, sys, urllib.request

from datetime import datetime, timedelta
from pathlib import Path
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QLabel, QPushButton, QLineEdit, QMessageBox

### Step 1: Ask the user how many days in the past they want

In [2]:
days = 1

class InputDaysWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.setWindowTitle("Please Input")

        layout = QVBoxLayout()

        label = QLabel("How many days in the past do you want?")
        layout.addWidget(label)

        self.input_box = QLineEdit()
        layout.addWidget(self.input_box)

        confirm_button = QPushButton("Confirm")
        confirm_button.clicked.connect(self.get_days)
        layout.addWidget(confirm_button)

        self.setLayout(layout)

    def get_days(self):
        global days # declare global variable
        days_txt = self.input_box.text()
        try:            
            if re.match("[+-]?\d+$", days_txt) is not None:
                days = int(days_txt)
                if days < 1:
                    raise ValueError("At least 1 day")
                if days > 1000:
                    raise ValueError("Too many days")
            else:
                raise ValueError("Please input an Integer")
                
            print("User input:", days)
            self.close()
        except ValueError as e:
            QMessageBox.warning(self, "Invalid Input", str(e))

app = QApplication(sys.argv)
window = InputDaysWindow()
window.show()
_ = app.exec_() # execute the app, and ignore the execution result, avoid print in Jupiter

User input: 1000


### Step 2: Calculate duration dictionary, prepare for generate endpoint url
We need to split the request into small pieces. From experience, having 20,000 results in one response can cause a server (503) or resource (400) error on USGS side. Generally, there are fewer than 20,000 earthquakes in 30 days, but in some cases, there may be more than 20,000 earthquakes even in 15 days. Therefore, we choose 10 days as the maximum duration for one request.

The endpoint date start at starttime, exclusive endtime.

In [3]:
duration_dict = {}

def calculate_start_date(end_date, days=10):
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    ten_days_ago = end_date - timedelta(days=days)
    start_date = ten_days_ago.strftime("%Y-%m-%d")
    return start_date

today = datetime.now()
end_date = today.strftime("%Y-%m-%d")

while days > 0:
    if days > 10:
        start_date = calculate_start_date(end_date)
        days -= 10
    else:
        start_date = calculate_start_date(end_date, days)
        days = 0
    duration_dict[end_date] = start_date
    end_date = start_date
    
print(duration_dict)

{'2024-03-10': '2024-02-29', '2024-02-29': '2024-02-19', '2024-02-19': '2024-02-09', '2024-02-09': '2024-01-30', '2024-01-30': '2024-01-20', '2024-01-20': '2024-01-10', '2024-01-10': '2023-12-31', '2023-12-31': '2023-12-21', '2023-12-21': '2023-12-11', '2023-12-11': '2023-12-01', '2023-12-01': '2023-11-21', '2023-11-21': '2023-11-11', '2023-11-11': '2023-11-01', '2023-11-01': '2023-10-22', '2023-10-22': '2023-10-12', '2023-10-12': '2023-10-02', '2023-10-02': '2023-09-22', '2023-09-22': '2023-09-12', '2023-09-12': '2023-09-02', '2023-09-02': '2023-08-23', '2023-08-23': '2023-08-13', '2023-08-13': '2023-08-03', '2023-08-03': '2023-07-24', '2023-07-24': '2023-07-14', '2023-07-14': '2023-07-04', '2023-07-04': '2023-06-24', '2023-06-24': '2023-06-14', '2023-06-14': '2023-06-04', '2023-06-04': '2023-05-25', '2023-05-25': '2023-05-15', '2023-05-15': '2023-05-05', '2023-05-05': '2023-04-25', '2023-04-25': '2023-04-15', '2023-04-15': '2023-04-05', '2023-04-05': '2023-03-26', '2023-03-26': '2023

### Step 3: Record the duration

In [4]:
first_key = next(iter(duration_dict.keys())) # end
last_value = list(duration_dict.values())[-1] # start

end_date = first_key
start_date = last_value

with open('data/duration.txt', 'w') as f:
    f.write(f"{start_date}\n")
    f.write(f"{end_date}\n")
    
print("Start:\t", start_date)
print("End:\t", end_date)

Start:	 2021-06-14
End:	 2024-03-10


### Step 4: Create raw data directory if it does not already exist, or delete previous data

In [5]:
dir_path = Path("data/raw_data")

if dir_path.exists():
    for item in dir_path.iterdir():
        item.unlink()
    print(f"Deleted resources under `{dir_path}`")
else:
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"mkdir `{dir_path}`")

Deleted resources under `data/raw_data`


### Step 5: Collect earthquake data from USGS API, save as JSON

In [6]:
%%time

for endtime in duration_dict:
    starttime = duration_dict[endtime]
    url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={starttime}&endtime={endtime}"
    print("GET " + url)
    response = urllib.request.urlopen(url)
    raw_json = response.read().decode("utf-8")
    data = json.loads(raw_json)

    fname = f"{dir_path}/%s_%s.json" % (starttime, endtime)
    with open(fname, "w") as json_file:
        json.dump(data, json_file)

print(f"Data saved in `./{dir_path}` directory.\n")

GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-29&endtime=2024-03-10
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-19&endtime=2024-02-29
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-09&endtime=2024-02-19
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-30&endtime=2024-02-09
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-20&endtime=2024-01-30
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-10&endtime=2024-01-20
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-12-31&endtime=2024-01-10
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-12-21&endtime=2023-12-31
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-12-11&endtime=2023-12-21
GET https://earthquake.usgs.

GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-30&endtime=2022-02-09
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-20&endtime=2022-01-30
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-10&endtime=2022-01-20
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-31&endtime=2022-01-10
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-21&endtime=2021-12-31
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-11&endtime=2021-12-21
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-01&endtime=2021-12-11
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-11-21&endtime=2021-12-01
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-11-11&endtime=2021-11-21
GET https://earthquake.usgs.