In [0]:
# Mount ADLS Gen2
# Required each time the cluster is restarted, which should only be the first notebook as they run in order

tiers = ["bronze", "silver", "gold"]
adls_paths = {tier: f"abfss://{tier}@jf803djeg67q36.dfs.core.windows.net/" for tier in tiers}

# Accessing paths

bronze_adls = adls_paths["bronze"]
silver_adls = adls_paths["silver"]
gold_adls = adls_paths["gold"]

dbutils.fs.ls(bronze_adls)
dbutils.fs.ls(silver_adls)
dbutils.fs.ls(gold_adls)

[]

In [0]:
import requests
import json
from datetime import date, timedelta

In [0]:
start_date = date.today() - timedelta(1)
end_date = date.today()

In [0]:
# Construct the API URL with start and end dates provided by data factory, formatted for geojson output

url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={start_date}&endtime={end_date}"

try:
    # Make the GET request to fetch data
    response = requests.get(url)

    # Check if request was successful
    response.raise_for_status() # Raise HTTPError bad responses
    data = response.json().get('features', [])

    if not data:
        print("No data returned for the specified date range.")
    else:
        # Specify ADLS Path
        file_path = f"{bronze_adls}/{start_date}_earthquakes_data.json"

        # Save the json data
        json_data = json.dumps(data, indent = 4)
        dbutils.fs.put(file_path, json_data, overwrite = True)
        print(f"Data successfully saved to {file_path}")
    
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from API: {e}") 



Wrote 271577 bytes.
Data successfully saved to abfss://bronze@jf803djeg67q36.dfs.core.windows.net//2025-03-08_earthquakes_data.json


In [0]:
# Define variables in a dictionary

output_data = {
"start_date": start_date.isoformat(),
"end_date": end_date.isoformat(),
"bronze_adls": bronze_adls,
"silver_adls": silver_adls,
"gold_adls": gold_adls
}

# Return the dictionary directly
dbutils.jobs.taskValues.set(key = "bronze_output", value = output_data)