In [None]:
from rasterstats import zonal_stats  # https://pythonhosted.org/rasterstats/manual.html#zonal-statistics
import os
import glob
import pandas as pd
import geopandas as gpd
import rasterio as rio
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme()

In [None]:
# Source Raster paths and shoreline buffer polygons
shore_buffer_path = '/Users/arbailey/natcap/idb/data/work/sargassum/shore_segments'
shore_buffer_file = 'shoreQR_segments_20210331_buffer100m.shp'
shore_buffer = os.path.join(shore_buffer_path, shore_buffer_file)

## Classifed Image Mosaics to Geodataframes of Presence/Absence/No Data Zonal Stats by Shoreline Segment (100m buffer)

In [None]:
#. Use Zonal stats to get count of each Land cover type (Sargassum present, absent, & no data)
cmap = {1: 'pr', -1: 'nd', 0: 'ab', -9999: 'ab2'}  # convert numeric values to present/absent/no data 2-letter codes

def sarg_segments(shore_polys, sarg_raster, affine=None):
    zs = zonal_stats(shore_polys, sarg_raster,
            stats="count",
            all_touched=True,
            categorical=True, category_map=cmap,
            geojson_out=True,
            affine=affine)
    return zs

# Convert Zonal Stats GeoJSON result to Geodataframe for some field manipulations and joining
def sargzs_gdf(zs_geojson, imgdate=None, imgtime=None):
    zs_gdf = gpd.GeoDataFrame.from_features(zs_geojson)
    zs_gdf.drop(columns=['geometry','fid', 'type_geomo','length_km', 'shore_desc', 'desc_abbre'], inplace=True, errors='ignore')
    # Assign no data pixels to absent (because they are pre-masked as non-sargassum areas )
    if 'ab2' in zs_gdf.columns:
        zs_gdf['ab'] = zs_gdf['ab'] + zs_gdf['ab2']
        zs_gdf.drop(columns=['ab2'], inplace=True, errors='ignore')
    if imgdate:
        zs_gdf['imgdate'] = imgdate
    if imgtime:
        zs_gdf['imgtime'] = imgtime
    return zs_gdf

def classed_to_gdf(raster_path):
    # Get list of all mosaic rasters
    os.chdir(raster_path)
#     mosaics = [f for f in glob.glob('*_mosaic.vrt')] 
    mosaics = [f for f in glob.glob('*_mosaic_nd0.vrt')] 
    mosaics.sort()
#     print(mosaics)
    
    dfs = []
    print('Calculating TOA Sargassum and Nearshore Zonal Stats......')
    for mosaic in mosaics:
        image_date = mosaic[0:8]
        image_time = mosaic[9:15]
        zs = sarg_segments(shore_buffer, mosaic)
        shore_df = sargzs_gdf(zs,image_date,image_time)
        shore_df = shore_df.fillna(0)
        dfs.append(shore_df)
        
    shore_df = pd.concat(dfs, ignore_index=True)
    shore_df['Date']= pd.to_datetime(shore_df['imgdate'])
    return shore_df    


In [None]:
# TOA v1 -- Calculate Zonal Stats & create stacked DF
raster_path = '/Users/arbailey/Google Drive/My Drive/sargassum/s2toa_classified_v1'
shore_df_toaV1 = classed_to_gdf(raster_path)
shore_df_toaV1

In [None]:
# # TOA v2 -- Calculate Zonal Stats & create stacked DF
# raster_path = '/Users/arbailey/Google Drive/My Drive/sargassum/s2toa_classified_v2'
# shore_df_toaV2 = classed_to_gdf(raster_path)
# shore_df_toaV2

In [None]:
# TOA v3 -- Calculate Zonal Stats & create stacked DF
raster_path = '/Users/arbailey/Google Drive/My Drive/sargassum/s2toa_classified_v3'
shore_df_toaV3 = classed_to_gdf(raster_path)
shore_df_toaV3

In [None]:
# # SR v1
# raster_path = '/Users/arbailey/Google Drive/My Drive/sargassum/s2sr_classified'
# shore_df_srV1 = classed_to_gdf(raster_path)
# shore_df_srV1

In [None]:
# Combine individual results from different training sets  
# toaV1
shore_df_toaV1_output = shore_df_toaV1.copy()
shore_df_toaV1_output = shore_df_toaV1_output[['seg_id', 'Date', 'pr', 'nd','count']]
shore_df_toaV1_output['nd_prop_toa1'] = shore_df_toaV1_output['nd'] / shore_df_toaV1_output['count']
shore_df_toaV1_output = shore_df_toaV1_output.rename(columns={'Date':'image_date','pr':'pr_pxl_toa1','nd':'nd_pxl_toa1','count':'cnt_pxl_toa1'})
shore_df_toaV1_output


In [None]:
shore_df_toaV3_output = shore_df_toaV3.copy()
shore_df_toaV3_output = shore_df_toaV3_output[['seg_id', 'Date', 'pr', 'nd','count']]
shore_df_toaV3_output['nd_prop_toa3'] = shore_df_toaV3_output['nd'] / shore_df_toaV3_output['count']
shore_df_toaV3_output = shore_df_toaV3_output.rename(columns={'Date':'image_date','pr':'pr_pxl_toa3','nd':'nd_pxl_toa3','count':'cnt_pxl_toa3'})
shore_df_toaV3_output

In [None]:
shore_df = pd.merge(shore_df_toaV1_output,shore_df_toaV3_output)
shore_df

In [None]:
#### Output
output_path = '/Users/arbailey/natcap/idb/data/work/sargassum/ip'
output_csv = os.path.join(output_path, 'shore100m_sargassum_stats.csv')
print('output dataframe to ' + output_csv)
shore_df.to_csv(path_or_buf=output_csv, index=False)

## Sum pixels (area) by date for each source

In [None]:
def groupbydate(df, source):
    grouped_df = df.groupby(['Date','imgdate', 'imgtime']).agg({'pr':['sum'] ,'ab':['sum'],'nd':['sum'],'count':['sum']})  #.reset_index()
    grouped_df.columns = ['_'.join(col) for col in grouped_df.columns.values]
    grouped_df = grouped_df.reset_index()
    grouped_df = grouped_df.set_index('Date')
    grouped_df['source']= source
    return grouped_df
# shore_grouped_df = shore_df.groupby(['Date','imgdate', 'imgtime']).agg({'pr':['sum'] ,'ab':['sum'],'nd':['sum'],'count':['sum']}).reset_index()
# shore_grouped_df

In [None]:
shore_grouped_toaV1 = groupbydate(shore_df_toaV1,'toaV1')
shore_grouped_toaV1

In [None]:
# shore_grouped_toaV2 = groupbydate(shore_df_toaV2, 'toaV2')
# shore_grouped_toaV2

In [None]:
shore_grouped_toaV3 = groupbydate(shore_df_toaV3, 'toaV3')
shore_grouped_toaV3

In [None]:
# shore_grouped_srV1 = groupbydate(shore_df_srV1, 'srV1')
# shore_grouped_srV1

## Visualize Time series of total Sargassum Area - Across Study Area

In [None]:
shore_grouped_toaV3['pr_sum'].plot(figsize=(20, 10))
# shore_grouped_toaV3.set_index('Date')['pr_sum'].plot.bar(figsize=(20, 10))

In [None]:
# shore_grouped_all = pd.concat([shore_grouped_srV1, shore_grouped_toaV1, shore_grouped_toaV2, shore_grouped_toaV3], axis=0).reset_index()
shore_grouped_all = pd.concat([shore_grouped_toaV1, shore_grouped_toaV3], axis=0).reset_index()
shore_grouped_all

In [None]:
shore_grouped_all['sum2'] = shore_grouped_all['pr_sum'] + shore_grouped_all['ab_sum'] + shore_grouped_all['nd_sum']
shore_grouped_all

In [None]:
shore_grouped_all[shore_grouped_all['count_sum'] < 1401942]

In [None]:
sns.set_theme(style='darkgrid', palette='Set3', font='Arial', font_scale=1.2)
# sns.relplot(data=shore_grouped_all, x="Date", y="pr_sum", hue="source", kind="line", height=10, aspect=2)
# sns.relplot(data=shore_grouped_all[shore_grouped_all['Date'] > '2019-01-01'], x="Date", y="pr_sum", hue="source", kind="line", height=10, aspect=2)

In [None]:
## Full Time Series. -- Sargassum Area 
f,ax = plt.subplots(figsize=(20, 10))
sns.lineplot(data=shore_grouped_all, x="Date", y="pr_sum", hue="source", ax=ax)
ax.set(xlabel='', ylabel = 'Sargassum Pixels')
# Major ticks every 3 months.
fmt_quarter = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter)


f,ax = plt.subplots(figsize=(20, 10))
sns.lineplot(data=shore_grouped_all[shore_grouped_all['Date'] > '2019-01-01'], x="Date", y="pr_sum", hue="source", ax=ax)
ax.set(xlabel='', ylabel = 'Sargassum Pixels')
# fmt_month = mdates.MonthLocator()
# ax.xaxis.set_major_locator(fmt_month)

In [None]:
f,ax = plt.subplots(figsize=(20, 10))
# sns.lineplot(data=shore_grouped_all, x="Date", y="pr_sum", hue="source", ax=ax)
sns.lineplot(data=shore_grouped_all, x="Date", y="nd_sum", hue="source", ax=ax)
ax.set(xlabel='', ylabel = 'No Data (clouds) Pixels')

In [None]:
# sns.relplot(data=shore_grouped_all, x="Date", y="ab_sum", hue="source", kind="line", height=10, aspect=2)
f,ax = plt.subplots(figsize=(20, 10))
sns.lineplot(data=shore_grouped_all, x="Date", y="ab_sum", hue="source", ax=ax)

In [None]:
# Test Dates only
# fast method
test_dates = ['2019-02-26', '2019-04-02', '2019-05-07', '2019-06-26', '2019-09-14', '2019-11-18', '2019-12-03']
# movies[movies.genre.isin(filter_list)]
shore_grouped_testdates = shore_grouped_all[shore_grouped_all.Date.isin(test_dates)]
shore_grouped_testdates['sarg_ha'] = shore_grouped_testdates["pr_sum"]/ 100
shore_grouped_testdates
f,ax = plt.subplots(figsize=(20, 10))
sns.lineplot(data=shore_grouped_testdates, x="Date", y="sarg_ha", hue="source", ax=ax)
ax.set(xlabel='', ylabel = 'Sargassum Area (ha)', yticks=[0,50,100,150,200,250,300,350,400,450,500,550,600])
# https://matplotlib.org/stable/gallery/text_labels_and_annotations/date.html
# major ticks every month.
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)

# # Text in the x axis will be displayed in 'YYYY-mm' format.
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

In [None]:
# Very simple one-liner using our agg_tips DataFrame.
# sns.set_theme(style='darkgrid', palette='deep', font='Arial', font_scale=1.2)
sns.set(palette='deep')
shore_grouped_toaV3.set_index('imgdate')[['pr_sum','ab_sum','nd_sum']].plot(kind='bar', stacked=True,figsize=(20, 10))

# # Just add a title and rotate the x-axis labels to be horizontal.
# plt.title('Tips by Day and Gender')
# plt.xticks(rotation=0, ha='center')

In [None]:
# Make sure there's no correlation between Sargassum area and Clouds
shore_grouped_all.plot.scatter(x='pr_sum', y='nd_sum',figsize=(10,10)).set(ylabel='No Data (Clouds) Pixel Count',xlabel='Sargassum Pixel Count')

## Compare Moon Palace Daily Sargassum Volume with Sentinel-2 Sargassum Area for Segment 130

In [None]:
moon_palace_spreadsheet = '/Users/arbailey/natcap/idb/data/work/sargassum/groundtruth/rosa/MoonPalace_Daily_2015_2019.xlsx'

moon_palace_df = pd.read_excel(moon_palace_spreadsheet, usecols=('Date','Species','VolTot','VolSar','VolSarkm')) #, dtype={'Species': str})  
moon_palace_df

In [None]:
moon_palace_segment_df = shore_df_toaV3[shore_df_toaV3["seg_id"] == 130]
moon_palace_segment_df

In [None]:
# moon_palace_df.set_index('Date')[['VolSar','VolSarkm']].plot(figsize=(24, 12), xlim=(('2015-11-01','2020-01-01')))
moon_palace_df.set_index('Date')[['VolSar']].plot(figsize=(24, 12), xlim=(('2015-11-01','2020-01-01')))

In [None]:
# moon_palace_segment_df.set_index('Date')['pr'].plot(figsize=(24, 12), xlim=(('2015-11-01','2020-01-01')))

In [None]:
# Join Moon Palace Daily time series with Sentinel-2 Segment 130 Sargassum on date
sargshore_moonpalace_df = pd.merge(moon_palace_segment_df, moon_palace_df, how="left", on="Date")
sargshore_moonpalace_df

In [None]:
sargshore_moonpalace_df.set_index('Date')[['pr','VolSar']].plot(figsize=(24, 12), xlim=(('2015-11-01','2020-01-01')))

In [None]:
sargshore_moonpalace_df.plot.scatter(x='pr', y='VolSar',figsize=(10,10)).set(xlabel='Sargassum Area (pixel count) from Sentinel-2',ylabel='Sargassum Vol (m3) at Moon Palace')

In [None]:
# sargshore_moonpalace_df.plot.scatter(x='pr', y='VolSar',loglog=True, figsize=(10,10)).set(xlabel='Sargassum Area (pixel count) from Sentinel-2',ylabel='Sargassum Vol (m3) at Moon Palace')

## Compare Data from Chavez et al. 2020 with Sentinel-2 Sargassum Area

### Landsat Time Series, 2016-2020 for Cancun to Xcalak

In [None]:
source_dir = '/Users/arbailey/natcap/idb/data/source/abigail_sargassum'
data_file = 'time series sargasso cover.txt'
landsat_timeseries_path = os.path.join(source_dir, data_file)
landsat_ts_df = pd.read_table(landsat_timeseries_path, names=['intdate','Date','area_ha'], parse_dates=['Date'], dayfirst=True)
landsat_ts_df['source'] = 'landsat'
landsat_ts_df = landsat_ts_df.drop(columns=['intdate'])
landsat_ts_df

In [None]:
landsat_ts_df.dtypes

In [None]:
s2ts_4landsat_df = shore_grouped_all[['Date','pr_sum','source']]
s2ts_4landsat_df['area_ha'] = s2ts_4landsat_df['pr_sum']  # / 100 # /10000
s2ts_4landsat_df = s2ts_4landsat_df.drop(columns=['pr_sum'])
s2ts_4landsat_df

In [None]:
# landsat_s2_ts_df = pd.concat([landsat_ts_df, s2ts_4landsat_df], axis=0).reset_index()
landsat_s2_ts_df = pd.concat([landsat_ts_df, s2ts_4landsat_df.loc[s2ts_4landsat_df.source == 'toaV3']], axis=0).reset_index()

In [None]:
landsat_s2_ts_df

In [None]:
sns.relplot(data=landsat_s2_ts_df, x="Date", y="area_ha", hue="source", kind="line", height=10, aspect=2)

In [None]:
landsat_ts_df.set_index('Date')[['area_ha']].plot(figsize=(24, 12))

### Puerto Morelos Beach Segment Biomass Compared to Sargassum Pixel count for Segment 195

In [None]:
fig6_spreadsheet = '/Users/arbailey/natcap/idb/data/work/sargassum/groundtruth/Biomass_sargassum_Brigit.xlsx'
fig6_df = pd.read_excel(fig6_spreadsheet, usecols=('Date','Biomass')) #, dtype={'Species': str})  
fig6_df['source'] = 'field'

fig6_df

In [None]:
fig6_df.set_index('Date')[['Biomass']].plot(figsize=(24, 12),xlim=(('2015-11-01','2020-01-01')))

In [None]:
fig6_segment_df = shore_df_toaV3[shore_df_toaV3["seg_id"] == 195]
fig6_segment_df.set_index('Date')[['pr']].plot(figsize=(24, 12),xlim=(('2015-11-01','2020-01-01')))

In [None]:
fig6_df.set_index('Date')[['Biomass']].plot(figsize=(24, 12),xlim=(('2015-11-01','2020-01-01')))

In [None]:
f, ax = plt.subplots(figsize=(20, 10))
fig6_segment_df.set_index('Date')[['pr']].plot(ax=ax, xlim=(('2015-11-01','2020-01-01')))
fig6_df.set_index('Date')[['Biomass']].plot(ax=ax, xlim=(('2015-11-01','2020-01-01')), secondary_y=True)
ax.set_ylabel("Sargassum Pixels Detected");
ax.right_ax.set_ylabel("Biomass (kg/m2)")