# How to retrieve ShorelineMonitor SDS series for a region?

## Instructions

1. Use the DynamicMap: By default, the map is set to Namibia. Select your area of interest on the DynamicMap. After selecting the area, proceed to the next step to store the spatial extent in variables and retrieve the data from cloud storage. 

2. Optional: Create Visualization Panels: Run the subsequent cells to create dashboard app that shows the clean series with respect to the raw data. By default, these panels open in a new tab in the browser for stability. If the plot does not display correctly, refresh the tab several times (up to 10 times may be needed).

In [None]:
import sys

import dask

dask.config.set({"dataframe.query-planning": False})

import logging
import os
import pathlib

import coastpy
import colorcet as cc
import dask_geopandas
import duckdb
import geopandas as gpd
import geoviews as gv
import holoviews as hv
import hvplot.pandas
import numpy as np
import pandas as pd
import panel as pn
import pystac
import shapely
from dotenv import load_dotenv

from coastmonitor.shorelines.intersection import (
    add_transect_statistics,
    find_primary_signal_per_transect_group,
)

load_dotenv(override=True)

# NOTE: access tokens to the data are available upon request.
sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN")
account_name = "coclico"
storage_options = {"account_name": account_name, "credential": sas_token}

# These are the URL's to the STAC catalog that we can use to efficiently index the data
COCLICO_STAC_URL = "https://coclico.blob.core.windows.net/stac/v1/catalog.json"

# Global Coastal Transect System (publicly available and in review)
GCTS_COLLECTION_NAME = "gcts"

# Global Coastal Transect Repository (unreleased; access keys provided upon request). This dataset consists
# of GCTS + several other characteristics, such as intersection distance to nearest coastline.
GCTR_COLLECTION_NAME = "gctr"

# ShorelineMonitor Raw Series (unreleased; access keys provided upon request). This dataset consists
# ShorelineMonitor Shorlines that are mapped onto the Global Coastal Transect System (Raw Series) that
# have a wide range of additional statistics used to filter out the primary, high-quality observations.
SM_COLLECTION_NAME = "shorelinemonitor-raw-series"

# These are the transect columns required for the analysis
TRANSECT_COLUMNS = [
    "transect_id",
    "lon",
    "lat",
    "bearing",
    "geometry",
    "osm_coastline_is_closed",
    "osm_coastline_length",
    "utm_epsg",
    "bbox",
    "quadkey",
    "country",
    "common_country_name",
    "dist_b0",
    "dist_b30",
    "dist_b330",
]

hv.extension("bokeh")
pn.extension()

## Read the STAC collections

In [None]:
coclico_catalog = pystac.Catalog.from_file(COCLICO_STAC_URL)
sm_collection = coclico_catalog.get_child(SM_COLLECTION_NAME)
gcts_collection = coclico_catalog.get_child(GCTR_COLLECTION_NAME)

## Show the spatial extents of both collections

In [None]:
sm_extents = coastpy.io.utils.read_items_extent(sm_collection, storage_options=storage_options)
gcts_extents = coastpy.io.utils.read_items_extent(gcts_collection, storage_options=storage_options)
sm_extents[["geometry"]].explore()

## Create a interactive map that we use to define our region of interest

In [None]:
from ipyleaflet import Map, basemaps

m = Map(basemap=basemaps.Esri.WorldImagery, scroll_wheel_zoom=True)
m.center = -22.946301, 14.410124
m.zoom = 12
m.layout.height = "800px"
m

In [None]:
# NOTE: these coordiantes are extracted from the interactive map above
minx, miny, maxx, maxy = m.west, m.south, m.east, m.north
roi = gpd.GeoDataFrame(geometry=[shapely.box(minx, miny, maxx, maxy)], crs=4326)

## Create a DuckDB query engine to retrieve data from cloud storage

In [None]:
from coastmonitor.shorelines.intersection import (
    clean_raw_series,
    compute_diffs,
    compute_ols_trend,
)

sds_ts_engine = coastpy.io.STACQueryEngine(
    stac_collection=sm_collection,
    storage_backend="azure",
)
sds_ts = sds_ts_engine.get_data_within_bbox(minx, miny, maxx, maxy)
transects_engine = coastpy.io.STACQueryEngine(
    stac_collection=gcts_collection, storage_backend="azure", columns=TRANSECT_COLUMNS
)
transects = transects_engine.get_data_within_bbox(minx, miny, maxx, maxy)
sds_ts_clean = clean_raw_series(
    sds_ts,
    transects,
    method="offshore",
    multi_obs_threshold=17.5,
    max_step_change=150,
    relative_importance_threshold=0.6,
)
# filter out the primary observations to get cleaner data
sds_ts_clean[sds_ts_clean["obs_is_primary"]].head()

### Optional: visualize the data in a small app

In [None]:
from coastmonitor.visualization.apptools import SpatialDataFrameApp

sds_ts_clean_ac = compute_ols_trend(
    sds_ts_clean[sds_ts_clean["obs_is_primary"]],
    transects,
    x="time",
    y="shoreline_position",
)

app = SpatialDataFrameApp(sds_ts_clean_ac, transects, sds_ts_clean)
app.create_view()
app.view.show()