# Explore Earthquake: World, US, Alaska and California
## COMP41680/COMP47670 Assignment 1 - Task 1: Data Collection

In [1]:
import json, re, sys, urllib.request

from datetime import datetime, timedelta
from pathlib import Path

# conda install pyqt qtpy
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QLabel, QPushButton, QLineEdit, QMessageBox

### Step 1: Ask the user how many days in the past they want
We use PyQt5 instead of tkinter, because in my MacOS Jupyter environment, PyQt5 is much stable than tkinter

In [2]:
class InputDaysWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.setWindowTitle("Please Input") # Window title

        layout = QVBoxLayout()  # Init Layout Manager, arrange elements vertically

        label = QLabel("How many days in the past do you want?") # Content title
        layout.addWidget(label)

        self.input_box = QLineEdit()  # Input box, defined with 'self' key word, because will also use in get_days method
        layout.addWidget(self.input_box)

        confirm_button = QPushButton("Confirm")  # Confirm button
        confirm_button.clicked.connect(self.get_days)  # Invoke get_days method
        layout.addWidget(confirm_button)

        self.setLayout(layout)

    def get_days(self):
        global days  # Declare global variable
        days_str = self.input_box.text()  # Read from input box
        try:
            if re.match("[+-]?\d+$", days_str) is not None:  # Regex check if the input is an integer (-1, 0, 1, +2...)
                days = int(days_str)  # Convert str to int
                if days < 1:
                    raise ValueError("At least 1 day")
                if days > 1000:
                    raise ValueError("Too many days, at most 1000")
            else:
                raise ValueError("Please input an Integer")
                
            print("User input:", days)
            self.close()
        except ValueError as e:
            QMessageBox.warning(self, "Invalid Input", str(e))

app = QApplication(sys.argv)
window = InputDaysWindow()
window.show()
_ = app.exec_()  # Execute the app, and ignore the execution result, avoid print in Jupiter

User input: 1000


### Step 2: Calculate duration dictionary, prepare for generate endpoint url
We need to split the request into small pieces. From experience, having 20,000 results in one response can cause a server (503) or resource (400) error on USGS side. Generally, there are fewer than 20,000 earthquakes in 30 days, but in some cases, there may be more than 20,000 earthquakes even in 15 days. Therefore, we choose 10 days as the maximum duration for one request.

The endpoint date start at starttime, exclusive endtime, e.g., <span style="color:green">2024-03-08 - 2024-03-12</span> include 2024-03-08, exclusive 2024-03-12.

In [3]:
duration_dict = {}

def calculate_start_date(end_date, days=10):
    """
    Calculate the start date given an end date and a number of days.

    Parameters:
        end_date (str): The end date in the format 'YYYY-MM-DD'.
        days (int, optional): The number of days before the end date. Defaults to 10.

    Returns:
        str: The calculated start date in the format 'YYYY-MM-DD'.
    """
    end_date = datetime.strptime(end_date, "%Y-%m-%d")  # Convert end date str to datetime object
    ten_days_ago = end_date - timedelta(days=days)  # Subtract the specified number of days from the end date
    start_date = ten_days_ago.strftime("%Y-%m-%d")  # Convert the result back to string format
    return start_date

today = datetime.now()
end_date = today.strftime("%Y-%m-%d")

# Calculate endpoint date pairs and store them in a dictionary
while days > 0:
    if days > 10:  # If remaining days are more than 10, calculate start date as 10 days ago
        start_date = calculate_start_date(end_date)
        days -= 10
    else:          # If remaining days are less than or equal to 10, calculate start date as the specified number of days ago
        start_date = calculate_start_date(end_date, days)
        days = 0
    duration_dict[end_date] = start_date  # Store the enddate-startdate pair in the dictionary
    end_date = start_date # In the next pair, end date is the previous start date
    
print(duration_dict)

{'2024-03-12': '2024-03-02', '2024-03-02': '2024-02-21', '2024-02-21': '2024-02-11', '2024-02-11': '2024-02-01', '2024-02-01': '2024-01-22', '2024-01-22': '2024-01-12', '2024-01-12': '2024-01-02', '2024-01-02': '2023-12-23', '2023-12-23': '2023-12-13', '2023-12-13': '2023-12-03', '2023-12-03': '2023-11-23', '2023-11-23': '2023-11-13', '2023-11-13': '2023-11-03', '2023-11-03': '2023-10-24', '2023-10-24': '2023-10-14', '2023-10-14': '2023-10-04', '2023-10-04': '2023-09-24', '2023-09-24': '2023-09-14', '2023-09-14': '2023-09-04', '2023-09-04': '2023-08-25', '2023-08-25': '2023-08-15', '2023-08-15': '2023-08-05', '2023-08-05': '2023-07-26', '2023-07-26': '2023-07-16', '2023-07-16': '2023-07-06', '2023-07-06': '2023-06-26', '2023-06-26': '2023-06-16', '2023-06-16': '2023-06-06', '2023-06-06': '2023-05-27', '2023-05-27': '2023-05-17', '2023-05-17': '2023-05-07', '2023-05-07': '2023-04-27', '2023-04-27': '2023-04-17', '2023-04-17': '2023-04-07', '2023-04-07': '2023-03-28', '2023-03-28': '2023

### Step 3: Record the duration

In [4]:
first_key = next(iter(duration_dict.keys())) # end date
last_value = list(duration_dict.values())[-1] # start date

end_date = first_key
start_date = last_value

with open('data/duration.txt', 'w') as f:  # Record the start and end date, part 2 will display it
    f.write(f"{start_date}\n")
    f.write(f"{end_date}\n")
    
print("Start:\t", start_date)
print("End:\t", end_date)

Start:	 2021-06-16
End:	 2024-03-12


### Step 4: Create raw data directory if it does not already exist, or delete previous data

In [5]:
dir_path = Path("data/raw_data")

if dir_path.exists():  # Iteration delete data in the directory
    for item in dir_path.iterdir():
        item.unlink()
    print(f"Deleted resources under `{dir_path}`")
else:                  # Create directory
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"mkdir `{dir_path}`")

Deleted resources under `data/raw_data`


### Step 5: Collect earthquake data from USGS API, save as JSON

In [6]:
%%time

for endtime in duration_dict:
    starttime = duration_dict[endtime]
    url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={starttime}&endtime={endtime}"
    print("GET", url)
    response = urllib.request.urlopen(url)
    raw_json = response.read().decode("utf-8")
    data = json.loads(raw_json)

    fname = f"{dir_path}/%s_%s.json" % (starttime, endtime)  # e.g. 2024-03-02_2024-03-12.json
    with open(fname, "w") as json_file:
        json.dump(data, json_file)

print(f"Data saved in `./{dir_path}` directory.\n")

GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-03-02&endtime=2024-03-12
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-21&endtime=2024-03-02
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-11&endtime=2024-02-21
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-02-01&endtime=2024-02-11
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-22&endtime=2024-02-01
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-12&endtime=2024-01-22
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-01-02&endtime=2024-01-12
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-12-23&endtime=2024-01-02
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-12-13&endtime=2023-12-23
GET https://earthquake.usgs.

GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-02-01&endtime=2022-02-11
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-22&endtime=2022-02-01
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-12&endtime=2022-01-22
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2022-01-02&endtime=2022-01-12
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-23&endtime=2022-01-02
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-13&endtime=2021-12-23
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-12-03&endtime=2021-12-13
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-11-23&endtime=2021-12-03
GET https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2021-11-13&endtime=2021-11-23
GET https://earthquake.usgs.