# Extracting coastlines from Global Coastal Transect System GCTS and Satellite Derived Shorelines System SDSS

Run the first few cells to load required functions and jump to the section you're interested in afterwards. 

In [None]:
import sys

# sys.path.insert(0, "..\src")
sys.path.insert(0, "../src")

import dask
# NOTE: query planning is not implemented in dask_geopandas yet, so we have to set 
# it to False before we do any dask_geopandas import 
dask.config.set({"dataframe.query-planning": False})

from coastlines4shorelines.utils import transect_origins_to_coastline,retrieve_transects_by_roi,shoreline_intersections_to_coastline,filter_sp

import logging
import os
import pathlib


from dask.dataframe.utils import make_meta

import dask_geopandas
import duckdb
import geopandas as gpd
import hvplot.pandas
import pandas as pd
import pystac
import shapely
from dotenv import load_dotenv
from ipyleaflet import Map, basemaps

from coastmonitor.geo.geometries import geo_bbox

load_dotenv(override=True)

sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN")
account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME")
storage_options = {"account_name": account_name, "credential": sas_token}

logging.getLogger("azure").setLevel(logging.WARNING)

## Load from STAC catalog

Load the transects from our CoCliCo STAC catalog. 

In [None]:
coclico_catalog = pystac.Catalog.from_file(
    "https://coclico.blob.core.windows.net/stac/v1/catalog.json"
)

In [None]:
coclico_catalog

In [None]:
list(coclico_catalog.get_all_collections())

In [None]:
gcts = coclico_catalog.get_child("gcts")
gcts

### Define a region of interest (ROI) based on a kml file

In [None]:
import fiona

fiona.drvsupport.supported_drivers["KML"] = "rw"
kml_fp = pathlib.Path(r"d:\FHICS\ShorelineS\ROIs\North_Carolina_Virginia.kml")
roi = gpd.read_file(kml_fp, driver="KML")

### Get all transects for this region of interest

In [None]:
transects_roi = retrieve_transects_by_roi(roi, storage_options=storage_options)

In [None]:
transects_roi.head()

In [None]:
list(transects_roi)

### Apply transect_origins_to_coastline and explode to get one LineString per row

In [None]:
coastline = (
    transects_roi.groupby("coastline_id")
    .apply(transect_origins_to_coastline)
    .explode()
    .reset_index(name="geometry")
    .drop(columns=["level_1"])
)
coastline.head()

In [None]:
coastline = gpd.GeoDataFrame(coastline, crs=4326)
coastline = gpd.overlay(coastline, roi[["geometry"]]).explode(index_parts=False)
coastline.head()

### Explore the ROI polygon and the cleaned up base coastline

In [None]:
m = roi.explore()
gpd.GeoDataFrame(coastline, crs=4326).explore(color="red", m=m)

### Load ShorelineMonitor SDS series 

In [None]:
sdss = dask_geopandas.read_parquet(
    "az://shorelinemonitor-raw-series/release/2024-04-15/sp_NC.parquet",
    storage_options=storage_options,
).compute()
sdss = sdss.assign(time=pd.to_datetime(sdss.time).dt.strftime("%Y-%m-%d"))

### Implement the filtering function to raw time series

In [None]:
sdss_clean = filter_sp(sdss)
list(sdss_clean)

### Sort on transect name and extract transect name into coastline_id, segment_id and transect_dist

In [None]:
    sdss = sdss_clean.sort_values("transect_id")
    sdss[["coastline_id", "segment_id", "transect_dist"]] = (
        sdss.transect_id.str.extract(r"cl(\d+)s(\d+)tr(\d+)")
    )
    sdss = sdss.astype(
        {"coastline_id": int, "segment_id": int, "transect_dist": int}
    )
    sdss.head()

### Create subsets of sdss for 2010 and 2020

In [None]:
import pandas as pd
# Ensure your time column is in datetime format
sdss['time'] = pd.to_datetime(sdss['time'])

# Define the specific date
specific_date = pd.Timestamp('2010-01-01')

# Filter the GeoDataFrame
sdss_2010 = sdss.loc[sdss['time'] == specific_date]
m=sdss_2010.geometry.explore()

In [None]:
sdss_2010

### Try a merge of the OSM smoothed coastline and the points for specific years from sdss 

In [None]:
merge_2010=pd.merge(transects_roi[["transect_id","lon","lat"]],sdss_2010,how="left",on="transect_id")

In [None]:
merge=pd.merge(transects_roi[["transect_id","lon","lat"]],sdss,how="left",on="transect_id")

In [None]:
merge

In [None]:
import netCDF4

gdf = gpd.GeoDataFrame(merge)
gdf['time'] = pd.to_datetime(gdf['time'])

# Pivot the data
lon_pivot = gdf.pivot(index='time', columns='transect_id', values='lon_y')
lat_pivot = gdf.pivot(index='time', columns='transect_id', values='lat_y')

# Convert the pivot tables to xarray DataArray
lon_xr = xr.DataArray(lon_pivot)
lat_xr = xr.DataArray(lat_pivot)

# Create a Dataset from the DataArrays
ds = xr.Dataset({'lon': lon_xr, 'lat': lat_xr})
ds.to_netcdf('test.nc')
print(ds)

In [None]:
# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

fig, ax = plt.subplots()

# Define the colormap
cmap = plt.cm.get_cmap('rainbow', len(ds.time))

# Extract the unique years
years = pd.to_datetime(ds.time.values)
unique_years = sorted(set(years))

for i, year in enumerate(unique_years):
    # Select data for the specific year
    year_data = ds.sel(time=year)
    plt.plot(year_data.lon,year_data.lat)
    gdf_year=gpd.GeoDataFrame({"lon":year_data.lon,"lat":year_data.lat})
    # Convert lon and lat to geometry
    gdf_year['geometry'] = gpd.points_from_xy(gdf_year.lon, gdf_year.lat)
    gdf_year = gdf_year.set_crs(epsg=4326)  # WGS 84

plt.xlim(-75.6, -75.4)
plt.ylim(35,36
         )
    

In [None]:
gdf_year.explore()

In [None]:
g

In [None]:
unique_years

In [None]:
years = pd.to_datetime(ds.time.values).year.round(0)
years

In [None]:
times = merge["time"].unique()
indx = times.argsort()
times[indx]

In [None]:
ds.time

In [None]:
import xarray as xr

xrds =merge_2010.to_xarray()

time = pd.date_range(start="2000", end="2022", freq="YS")
xrds_t = xrds.expand_dims(timey=time)
xrds_t.dims

In [None]:
xrds_t

In [None]:
import matplotlib.pyplot as plt
plt.plot(xrds["lon_x"],xrds["lat_x"],xrds["lon_y"],xrds["lat_y"])
plt.xlim([-77,-75])
plt.ylim([34,37])

In [None]:
xrds.dims

In [None]:
c_2010=gpd.GeoDataFrame(merge_2010,geometry=gpd.GeoSeries.from_xy(merge_2010.lon_y,merge_2010.lat_y,crs=4326))
c_2010=c_2010.rename(columns={"lon_y":"lon","lat_y":"lat"})
coast_2010 = shoreline_intersections_to_coastline(c_2010)
coast_2010
gpd.GeoDataFrame(geometry=coast_2010,crs=4326).explore()

### Show both datasets; notice they are points

In [None]:
import pandas as pd
# Ensure your time column is in datetime format
sdss['time'] = pd.to_datetime(sdss['time'])

# Define the specific date
specific_date = pd.Timestamp('2020-01-01')

# Filter the GeoDataFrame
sdss_2020 = sdss.loc[sdss['time'] == specific_date]
sdss_2020.geometry.explore(color="red", m=m)

In [None]:
import pandas as pd
# Ensure your time column is in datetime format
sdss['time'] = pd.to_datetime(sdss['time'])

# Define the specific date
specific_date = pd.Timestamp('2022-01-01')

# Filter the GeoDataFrame
sdss_2022 = sdss.loc[sdss['time'] == specific_date]
sdss_2022.geometry.explore(color="green", m=m)

In [None]:
#list(sdss_2010)

### Now connect the dots

In [None]:
coastline_2010 = shoreline_intersections_to_coastline(sdss_2010)
coastline_2020 = shoreline_intersections_to_coastline(sdss_2020)
#list(coastline_2020)

### Turn the LineStrings into a GeoDataFrame

In [None]:
coastline_2010 = gpd.GeoDataFrame(geometry=coastline_2010,crs=4326)
coastline_2020 = gpd.GeoDataFrame(geometry=coastline_2020,crs=4326)

### Now explore both coastlines

In [None]:
m = coastline_2010.explore()
coastline_2020.explore(color="red", columnn="time", m=m)

In [None]:
import geopandas as gpd
import pandas as pd
import xarray as xr

# Assuming gdf is your GeoPandas GeoDataFrame
# Sample data creation (replace this with your actual GeoDataFrame)
data = {
    'transect_name': ['A', 'B', 'A', 'B'],
    'lon': [-123.3656, -123.3657, -123.3656, -123.3657],
    'lat': [48.4284, 48.4285, 48.4284, 48.4285],
    'time': ['2021-01-01', '2021-01-01', '2021-01-02', '2021-01-02']
}
gdf = gpd.GeoDataFrame(data)
gdf['time'] = pd.to_datetime(gdf['time'])

# Pivot the data
lon_pivot = gdf.pivot(index='time', columns='transect_name', values='lon')
lat_pivot = gdf.pivot(index='time', columns='transect_name', values='lat')

# Convert the pivot tables to xarray DataArray
lon_xr = xr.DataArray(lon_pivot)
lat_xr = xr.DataArray(lat_pivot)

# Create a Dataset from the DataArrays
ds = xr.Dataset({'lon': lon_xr, 'lat': lat_xr})

print(ds)

In [None]:
ds.lon[0,0]