In [None]:
# Import standard libraries
from pathlib import Path
from datetime import datetime

# import third-party libraries
import numpy as np
import pandas as pd
import scipy as sp
import xarray as xr


In [None]:
# Define constants
DATA_DIR = Path.cwd() / "data/"
STANDARD_DEPTHS = np.array(
    [
        0,
        10,
        20,
        30,
        50,
        75,
        100,
        125,
        150,
        200,
        250,
        300,
        400,
        500,
    ]
)

In [None]:
def concat_data_array(param_name: str, sdate: str, edate: str) -> np.ndarray:
    """
    Concatenate data arrays from JSON files for a given parameter and date range.

    Parameters
    ----------
    param_name : str
        Name of the parameter to extract from the JSON files (e.g., 'wtr_tmp', 'sal').
    sdate : str
        Start date in the format 'YYYY-MM-DD'.
    edate : str
        End date in the format 'YYYY-MM-DD'.

    Returns
    -------
    np.ndarray
        Concatenated array of the parameter values across the specified date range.
    """
    data_list = []
    sdate_dt = datetime.strptime(sdate, "%Y-%m-%d")
    edate_dt = datetime.strptime(edate, "%Y-%m-%d")
    start_year = sdate_dt.year
    end_year = edate_dt.year
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            f = DATA_DIR / f"sooList_{year}{month:02d}.json"
            if not f.is_file():
                continue
            df = pd.read_json(f)
            arr = df[param_name].replace(["", None], np.nan)
            data_list.append(arr.values)
    if data_list:
        return np.concatenate(data_list)
    return np.array([])


In [None]:
# Set date range
sdate = "1968-01-01"
edate = "2024-12-31"

# Define parameters to extract
params = [
    "wtr_tmp",  # Water temperature
    "wtr_dep",  # Water depth
    "obs_dtm",  # Observation date and time
    "lon",      # Longitude
    "lat",      # Latitude
    "sal",      # Salinity
    "dox"       # Dissolved oxygen
]


In [None]:
# Collect data for each parameter
for param in params:
    if param == "obs_dtm":
        # Special handling for observation date and time
        obs_time = concat_data_array(param, sdate, edate)
        obs_time = pd.to_datetime(obs_time, errors="coerce")
        obs_time = obs_time.dropna().values
    else:
        # General case for other parameters
        data = concat_data_array(param, sdate, edate)
        if param == "wtr_tmp":
            temperature = data
        elif param == "wtr_dep":
            depth = data
        elif param == "lon":
            longitude = data
        elif param == "lat":
            latitude = data
        elif param == "sal":
            salinity = data
        elif param == "dox":
            dissolved_oxygen = data