In [22]:
import pandas as pd
import onc
import os
import json
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

load_dotenv()
token = os.getenv("ONC_TOKEN")

# Create ONC client using the SDK
my_onc = onc.ONC(token)

In [23]:
""" get Scalar sample output"""

def get_property(start: str, end: str, locationCode: str, deviceCategoryCode: str, sensorCategoryCode: str) -> pd.DataFrame:

    params = {
        "locationCode": locationCode,
        "deviceCategoryCode": deviceCategoryCode,
        "sensorCategoryCodes": sensorCategoryCode,
        "dateFrom": start,
        "dateTo" : end
    }

    # JSON response from ONC
    result = my_onc.getScalardata(params)
    formatted = json.dumps(result, indent=4)
    print(formatted)

In [24]:
dateFrom = "2023-07-11T17:00:00.000Z"
dateTo = "2023-07-11T22:30:00.000Z"

df = get_property(dateFrom, dateTo, "CF341", "CTD", "temperature, depth") # populate dataframe of entire day


{
    "citations": [],
    "messages": [],
    "next": null,
    "parameters": {
        "dateFrom": "2023-07-11T17:00:00.000Z",
        "dateTo": "2023-07-11T22:30:00.000Z",
        "deviceCategoryCode": "CTD",
        "fillGaps": true,
        "getLatest": false,
        "locationCode": "CF341",
        "metaData": "Minimum",
        "method": "getByLocation",
        "outputFormat": "Array",
        "propertyCode": null,
        "qualityControl": "clean",
        "resamplePeriod": null,
        "resampleType": null,
        "rowLimit": 100000,
        "sensorBase": null,
        "sensorCategoryCodes": [
            "temperature",
            "depth"
        ],
        "sensorsToInclude": "original",
        "token": "d008594a-922f-441d-87e4-be3e06faaf15"
    },
    "queryUrl": "https://data.oceannetworks.ca/api/scalardata?locationCode=CF341&deviceCategoryCode=CTD&sensorCategoryCodes=temperature%2C+depth&dateFrom=2023-07-11T17%3A00%3A00.000Z&dateTo=2023-07-11T22%3A30%3A00.000Z&method

In [25]:
"""
Calculates the duration during which CTD depth measurements exceeded a given threshold
within a specified time window.

Parameters:
    df (pd.DataFrame): DataFrame with 'timestamp' (datetime) and 'depth' columns.
    threshold (float): Depth threshold in meters (default = 23).
    dateFrom (str): Optional ISO 8601 UTC start time (e.g., "2023-07-11T17:00:00.000Z").
    dateTo (str): Optional ISO 8601 UTC end time (e.g., "2023-07-11T22:30:00.000Z").

Returns:
    pd.Timedelta or None: Duration between first and last timestamps where depth > threshold,
                          or None if no such data exists.
"""
def get_duration_below_depth(df, threshold=23, dateFrom=None, dateTo=None) -> pd.Timedelta:
    # Convert dateFrom and dateTo to datetime, if provided
    if dateFrom:
        dateFrom = pd.to_datetime(dateFrom)
        df = df[df["timestamp"] >= dateFrom]
    if dateTo:
        dateTo = pd.to_datetime(dateTo)
        df = df[df["timestamp"] <= dateTo]

    # Filter for depths greater than the threshold
    deep_df = df[df["depth"] > threshold]

    if deep_df.empty:
        print(f"No data where depth > {threshold} m in the specified time window.")
        return None

    # Compute duration from first to last timestamp exceeding the threshold
    duration = deep_df["timestamp"].iloc[-1] - deep_df["timestamp"].iloc[0]

    print(f"Duration with depth > {threshold} m: {duration}")
    print(f"From {deep_df['timestamp'].iloc[0]} to {deep_df['timestamp'].iloc[-1]}")

    return duration


In [26]:
"""
Fetches a list of dictionaries where each dictionary represents a single property, and contains metadata that 
describes how that property is defined, measured, and whether data is available at that location.
"""

def find_properties_by_location(locationCode: str):

    params = {
        "locationCode": locationCode,
        #"deviceCategoryCode" : "CTD" # only consider CTD data properties
    }

    result = my_onc.getProperties(params)
    extracted = []

    for entry in result:
        # Defensive check: make sure these keys exist
        name = entry.get("propertyName", "")
        code = entry.get("propertyCode", "")
        has_data = entry.get("hasDeviceData", False)

        # Optionally: filter out properties that aren't actually measured
        if name and code:
            extracted.append({
                "propertyName": name,
                "propertyCode": code,
                "hasDeviceData": has_data
            })
    
    df = pd.DataFrame(extracted)
    print(df)


In [None]:
def detect_cast_intervals(df: pd.DataFrame, gap_threshold_minutes: int = 10) -> List[Tuple[pd.Timedelta, pd.Timedelta]]:
    """
    Detects individual CTD cast intervals based on time gaps in the data.

    Parameters:
        df (pd.DataFrame): DataFrame with a 'timestamp' column (datetime format).
        gap_threshold_minutes (int): Time gap threshold to detect breaks between casts.

    Returns:
        List[Tuple[pd.Timedelta, pd.Timedelta]]: List of (dateFrom, dateTo) pairs in ISO 8601 UTC format.
    """
    if df.empty or "timestamp" not in df.columns:
        return []

    # Sort and reset index
    df = df.sort_values("timestamp").reset_index(drop=True)

    # Calculate time gaps between consecutive timestamps
    gaps = df["timestamp"].diff().fillna(pd.Timedelta(seconds=0))
    new_cast_starts = df.index[gaps > pd.Timedelta(minutes=gap_threshold_minutes)].tolist()

    # Start of first cast is always 0
    cast_starts = [0] + new_cast_starts
    cast_ends = new_cast_starts + [len(df)]

    # Format intervals as ISO 8601 with milliseconds and Z (UTC)
    intervals = []
    for start_idx, end_idx in zip(cast_starts, cast_ends):
        start = df["timestamp"].iloc[start_idx].strftime("%Y-%m-%dT%H:%M:%S.000Z")
        end = df["timestamp"].iloc[end_idx - 1].strftime("%Y-%m-%dT%H:%M:%S.000Z")
        intervals.append((start,end))

    return intervals


In [None]:
def detect_deep_intervals(df: pd.DataFrame, depth_threshold: float, gap_threshold_seconds: int = 60) -> list[tuple[str, str]]:
    """
    Detects continuous time intervals where CTD depth exceeds a threshold.

    Parameters:
        df (pd.DataFrame): DataFrame with 'timestamp' and 'depth' columns.
        depth_threshold (float): Minimum depth to consider.
        gap_threshold_seconds (int): Max time gap (in seconds) to consider part of same interval.

    Returns:
        List[Tuple[str, str]]: List of (start, end) ISO8601 strings for each deep interval.
    """


    df_deep = df[df["depth"] > depth_threshold].copy()

    if df_deep.empty:
        return []

    df_deep = df_deep.sort_values("timestamp").reset_index(drop=True)
    df_deep["delta"] = df_deep["timestamp"].diff().dt.total_seconds()

    intervals = []
    start_time = df_deep.loc[0, "timestamp"]

    for i in range(1, len(df_deep)):
        if df_deep.loc[i, "delta"] > gap_threshold_seconds:
            end_time = df_deep.loc[i - 1, "timestamp"]
            intervals.append((
                start_time.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
                end_time.strftime("%Y-%m-%dT%H:%M:%S.000Z")
            ))
            start_time = df_deep.loc[i, "timestamp"]

    # Append the final interval
    intervals.append((
        start_time.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
        df_deep.iloc[-1]["timestamp"].strftime("%Y-%m-%dT%H:%M:%S.000Z")
    ))

    return intervals
