In [52]:
%reload_ext autotime
import geopandas as gpd
import pandas as pd
from glob import glob
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error
from coastsat import SDS_transects
pd.options.plotting.backend = "plotly"

In [4]:
# Transects, origin is landward. Has beach_slope
transects = gpd.read_file("transects_extended.geojson")
transects.set_index("id", inplace=True)
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,ERODIBILITY,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.456305,654.0,342.0,0.131951,28.721214,1321.059410,36.346381,179.407277,,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-0.833581,654.0,473.0,0.052783,26.029482,1092.336862,33.050520,210.629160,,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.282209,654.0,491.0,0.008220,22.896464,838.997233,28.965449,200.166307,,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,0.089340,654.0,502.0,0.000987,20.770475,691.179232,26.290288,185.295473,,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,0.361437,654.0,508.0,0.017461,19.528839,637.676213,25.252252,169.027861,,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,-0.074663,1220.0,1132.0,0.020859,4.031812,37.686052,6.138897,244.745360,Low,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,0.159357,388.0,337.0,0.014258,6.685558,118.654858,10.892881,212.777700,Low,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,0.160824,388.0,356.0,0.020390,5.452777,82.388602,9.076817,191.174425,Low,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,-0.371603,388.0,349.0,0.027053,9.205384,346.372279,18.611079,222.954625,Low,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [5]:
vos_files = pd.Series(
    sorted(glob("shoreline_data_run6/*/time_series_tidally_corrected.csv"))
)
vos_files = vos_files[~vos_files.str.contains("nzd")]
vos_files

0       shoreline_data_run6/aus0001/time_series_tidall...
1       shoreline_data_run6/aus0002/time_series_tidall...
2       shoreline_data_run6/aus0003/time_series_tidall...
3       shoreline_data_run6/aus0004/time_series_tidall...
4       shoreline_data_run6/aus0005/time_series_tidall...
                              ...                        
3064    shoreline_data_run6/usa_HI_0051/time_series_ti...
3065    shoreline_data_run6/usa_HI_0052/time_series_ti...
3066    shoreline_data_run6/usa_HI_0053/time_series_ti...
3067    shoreline_data_run6/usa_HI_0054/time_series_ti...
3068    shoreline_data_run6/usa_HI_0057/time_series_ti...
Length: 2509, dtype: object

In [6]:
my_nz_files = pd.Series(
    sorted(glob("data/*/transect_time_series_tidally_corrected.csv"))
)
my_nz_files

0      data/nzd0001/transect_time_series_tidally_corr...
1      data/nzd0002/transect_time_series_tidally_corr...
2      data/nzd0003/transect_time_series_tidally_corr...
3      data/nzd0004/transect_time_series_tidally_corr...
4      data/nzd0005/transect_time_series_tidally_corr...
                             ...                        
557    data/nzd0559/transect_time_series_tidally_corr...
558    data/nzd0560/transect_time_series_tidally_corr...
559    data/nzd0561/transect_time_series_tidally_corr...
560    data/nzd0562/transect_time_series_tidally_corr...
561    data/nzd0563/transect_time_series_tidally_corr...
Length: 562, dtype: object

In [7]:
sar_files = pd.Series(sorted(glob("data/sar*/transect_time_series.csv")))
sar_files

0       data/sar0001/transect_time_series.csv
1       data/sar0002/transect_time_series.csv
2       data/sar0003/transect_time_series.csv
3       data/sar0004/transect_time_series.csv
4       data/sar0005/transect_time_series.csv
                        ...                  
2534    data/sar2535/transect_time_series.csv
2535    data/sar2536/transect_time_series.csv
2536    data/sar2537/transect_time_series.csv
2537    data/sar2538/transect_time_series.csv
2538    data/sar2539/transect_time_series.csv
Length: 2539, dtype: object

In [8]:
files = pd.concat([vos_files, my_nz_files, sar_files])
files

0       shoreline_data_run6/aus0001/time_series_tidall...
1       shoreline_data_run6/aus0002/time_series_tidall...
2       shoreline_data_run6/aus0003/time_series_tidall...
3       shoreline_data_run6/aus0004/time_series_tidall...
4       shoreline_data_run6/aus0005/time_series_tidall...
                              ...                        
2534                data/sar2535/transect_time_series.csv
2535                data/sar2536/transect_time_series.csv
2536                data/sar2537/transect_time_series.csv
2537                data/sar2538/transect_time_series.csv
2538                data/sar2539/transect_time_series.csv
Length: 5610, dtype: object

In [68]:
f = files[files.str.contains("sar1117")].iloc[0]
#despiked_filename = f.replace(".csv", "_tidally_corrected.csv")
df = pd.read_csv(f)
df.dates = pd.to_datetime(df.dates)
df.set_index("dates", inplace=True)
display(df.columns)
import matplotlib.pyplot as plt
transect_id = "sar1117-0003"
pd.DataFrame({"raw": df[transect_id],
              "rolling 90d": df[transect_id].rolling("90d", min_periods=1).mean(),
              "rolling 180d": df[transect_id].rolling("180d", min_periods=1).mean()
              #"rolling 365d": df[transect_id].rolling("365d", min_periods=1).mean(),
              }, index=df.index).plot()

Index(['satname', 'sar1117-0000', 'sar1117-0001', 'sar1117-0002',
       'sar1117-0003', 'sar1117-0004', 'sar1117-0005', 'sar1117-0006',
       'sar1117-0007'],
      dtype='object')

In [73]:
df = pd.read_csv("data/sar0939/transect_time_series.csv")
df.dates = pd.to_datetime(df.dates)
df.set_index("dates", inplace=True)
(df["sar0939-0000"] - 93).plot()

In [69]:
def despike(chainage, threshold=40):
    chainage = chainage.dropna()
    chainage, dates = SDS_transects.identify_outliers(
        chainage.tolist(), chainage.index.tolist(), threshold
    )
    return pd.Series(chainage, index=dates)


def get_trends(f):
    df = pd.read_csv(f)
    try:
        df.dates = pd.to_datetime(df.dates)
    except:
        print(f)
    if "sar" in f:
        despiked_filename = f.replace(".csv", "_despiked.csv")
        try:
            raise
            df = pd.read_csv(despiked_filename)
            df.dates = pd.to_datetime(df.dates)
        except:
            df.dates = pd.to_datetime(df.dates)
            df.set_index("dates", inplace=True)
            satname = df.satname
            df = df.drop(columns="satname").apply(despike, axis=0)
            for transect_id in df.columns:
                df[transect_id] = df[transect_id].rolling("180d", min_periods=1).mean()
            df["satname"] = satname
            df.reset_index(names="dates", inplace=True)
            df.to_csv(f.replace(".csv", "_despiked.csv"), index=False)
    df.index = (df.dates - df.dates.min()).dt.days / 365.25
    df.drop(columns=["dates", "satname", "Unnamed: 0"], inplace=True, errors="ignore")
    trends = []
    for transect_id in df.columns:
        sub_df = df[transect_id].dropna()
        if not len(sub_df):
            continue
        x = sub_df.index.to_numpy().reshape(-1, 1)
        y = sub_df
        linear_model = LinearRegression().fit(x, y)
        pred = linear_model.predict(x)
        trends.append(
            {
                "transect_id": transect_id,
                "trend": linear_model.coef_[0],
                "intercept": linear_model.intercept_,
                "n_points": len(df[transect_id]),
                "n_points_nonan": len(sub_df),
                "r2_score": r2_score(y, pred),
                "mae": mean_absolute_error(y, pred),
                "mse": mean_squared_error(y, pred),
                "rmse": root_mean_squared_error(y, pred),
            }
        )
    return pd.DataFrame(trends)


trends = pd.concat(process_map(get_trends, files)).set_index("transect_id")
len(trends)


Iterable length 5610 > 1000 but `chunksize` is not set. This may seriously degrade multiprocess performance. Set `chunksize=1` or more.



  0%|          | 0/5610 [00:00<?, ?it/s]

****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************

141686

In [74]:
trends[trends.n_points_nonan > 10].sort_values("r2_score")

Unnamed: 0_level_0,trend,intercept,n_points,n_points_nonan,r2_score,mae,mse,rmse
transect_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
aus0213-0000,-0.000003,211.475107,543,487,8.622880e-12,7.797467,94.136738,9.702409
chi0406-0028,0.000005,183.818769,456,396,1.916567e-11,6.755653,85.309469,9.236312
mex0157-0034,-0.000008,225.225194,496,413,3.072342e-11,10.834152,183.654195,13.551907
usa_CA_0052-0040,-0.000010,226.224974,1340,971,5.089262e-11,10.782984,177.958484,13.340108
sar1545-0003,0.000001,198.677122,1343,1343,6.544132e-11,1.715364,4.526692,2.127602
...,...,...,...,...,...,...,...,...
sar0812-0006,-6.600339,357.409204,499,497,9.898222e-01,6.302722,59.685479,7.725638
sar0812-0007,-6.805908,364.233853,499,491,9.902502e-01,6.062722,57.211007,7.563796
sar0811-0004,-7.117457,395.353104,557,543,9.913262e-01,6.422112,55.686163,7.462316
sar0812-0005,-6.515664,372.656979,499,499,9.932725e-01,4.608779,38.264576,6.185837


In [75]:
trends[trends.index.str.startswith("sar")].describe()

Unnamed: 0,trend,intercept,n_points,n_points_nonan,r2_score,mae,mse,rmse
count,19753.0,19753.0,19753.0,19753.0,19753.0,19753.0,19753.0,19753.0
mean,0.015261,227.011145,960.060801,958.430922,0.1578351,5.963747,295.421435,8.0292
std,0.900108,95.700337,304.793908,305.531393,0.1965702,11.703091,1825.287245,15.197535
min,-21.644437,-144.682236,187.0,8.0,6.544132e-11,0.0,0.0,0.0
25%,-0.113232,185.28132,685.0,684.0,0.01618775,2.071444,7.208323,2.684832
50%,-0.008421,203.555745,919.0,918.0,0.07439026,2.797285,13.151176,3.626455
75%,0.085454,227.126363,1283.0,1282.0,0.2253854,4.529388,35.322643,5.943286
max,13.353891,1010.727147,1434.0,1434.0,1.0,284.928271,87493.864388,295.793618


In [76]:
trends.describe()

Unnamed: 0,trend,intercept,n_points,n_points_nonan,r2_score,mae,mse,rmse
count,141686.0,141686.0,141686.0,141686.0,141686.0,141686.0,141686.0,141686.0
mean,0.207294,237.028922,614.638878,510.01384,0.1690465,11.975714,346.826434,15.211752
std,1.908139,97.192347,297.485467,288.685295,0.2162061,8.579267,924.78387,10.743829
min,-61.721935,-2765.025713,56.0,8.0,8.62288e-12,0.0,0.0,0.0
25%,-0.228601,185.609404,405.0,308.0,0.01526744,7.562835,94.344211,9.713095
50%,0.07849,214.773916,551.0,446.0,0.07294255,10.454762,175.037964,13.230191
75%,0.525658,296.403883,745.0,657.0,0.2394665,14.163464,320.895477,17.913556
max,89.734302,2329.832271,1567.0,1434.0,1.0,284.928271,87493.864388,295.793618


In [77]:
(transects.trend - trends.trend).describe()

count    141708.000000
mean          0.000275
std           0.033592
min          -2.299107
25%           0.000000
50%           0.000000
75%           0.000000
max           2.547607
Name: trend, dtype: float64

In [78]:
transects.update(trends.drop_duplicates())

In [79]:
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,ERODIBILITY,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.456305,654.0,342.0,0.131951,28.721214,1321.059410,36.346381,179.407277,,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-0.833581,654.0,473.0,0.052783,26.029482,1092.336862,33.050520,210.629160,,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.282209,654.0,491.0,0.008220,22.896464,838.997233,28.965449,200.166307,,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,0.089340,654.0,502.0,0.000987,20.770475,691.179232,26.290288,185.295473,,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,0.361437,654.0,508.0,0.017461,19.528839,637.676213,25.252252,169.027861,,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,-0.072765,1220.0,1220.0,0.159922,1.468474,3.900855,1.975058,244.639331,Low,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,0.094468,388.0,378.0,0.023321,2.795524,24.861725,4.986153,214.008472,Low,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,0.161493,388.0,376.0,0.058879,3.099783,27.664877,5.259741,191.216499,Low,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,-0.285881,388.0,379.0,0.024153,5.867024,226.611071,15.053607,221.258244,Low,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [80]:
trends.columns, transects.columns, trends.columns.isin(transects.columns)

(Index(['trend', 'intercept', 'n_points', 'n_points_nonan', 'r2_score', 'mae',
        'mse', 'rmse'],
       dtype='object'),
 Index(['site_id', 'orientation', 'along_dist', 'along_dist_norm',
        'beach_slope', 'cil', 'ciu', 'trend', 'n_points', 'n_points_nonan',
        'r2_score', 'mae', 'mse', 'rmse', 'intercept', 'ERODIBILITY',
        'geometry'],
       dtype='object'),
 array([ True,  True,  True,  True,  True,  True,  True,  True]))

In [81]:
transects = transects.join(trends.loc[:, ~trends.columns.isin(transects.columns)])
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,ERODIBILITY,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.456305,654.0,342.0,0.131951,28.721214,1321.059410,36.346381,179.407277,,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-0.833581,654.0,473.0,0.052783,26.029482,1092.336862,33.050520,210.629160,,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.282209,654.0,491.0,0.008220,22.896464,838.997233,28.965449,200.166307,,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,0.089340,654.0,502.0,0.000987,20.770475,691.179232,26.290288,185.295473,,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,0.361437,654.0,508.0,0.017461,19.528839,637.676213,25.252252,169.027861,,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,-0.072765,1220.0,1220.0,0.159922,1.468474,3.900855,1.975058,244.639331,Low,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,0.094468,388.0,378.0,0.023321,2.795524,24.861725,4.986153,214.008472,Low,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,0.161493,388.0,376.0,0.058879,3.099783,27.664877,5.259741,191.216499,Low,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,-0.285881,388.0,379.0,0.024153,5.867024,226.611071,15.053607,221.258244,Low,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [82]:
transects[transects.site_id.str.startswith("sar") & ~transects.trend.isna()]

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,ERODIBILITY,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
sar0001-0000,sar0001,,,,,,,-0.162173,664.0,660.0,0.001845,34.101922,2024.879609,44.998662,135.226715,Medium,"LINESTRING (8.40852 38.86175, 8.40882 38.86535)"
sar0001-0001,sar0001,,,,,,,-0.082243,664.0,660.0,0.002388,10.749488,398.646112,19.966124,196.457827,Medium,"LINESTRING (8.4084 38.86162, 8.41092 38.86464)"
sar0001-0002,sar0001,,,,,,,-0.003968,664.0,662.0,0.000043,5.780017,51.888768,7.203386,206.273746,Medium,"LINESTRING (8.40893 38.86153, 8.41236 38.86393)"
sar0001-0003,sar0001,,,,,,,-0.041647,664.0,662.0,0.014603,3.217471,16.470026,4.058328,239.469597,Medium,"LINESTRING (8.40904 38.86129, 8.41333 38.8626)"
sar0001-0004,sar0001,,,,,,,0.015094,664.0,662.0,0.002418,2.687571,13.224690,3.636577,258.082290,Medium,"LINESTRING (8.40904 38.86176, 8.4133 38.86037)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,-0.072765,1220.0,1220.0,0.159922,1.468474,3.900855,1.975058,244.639331,Low,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,0.094468,388.0,378.0,0.023321,2.795524,24.861725,4.986153,214.008472,Low,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,0.161493,388.0,376.0,0.058879,3.099783,27.664877,5.259741,191.216499,Low,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,-0.285881,388.0,379.0,0.024153,5.867024,226.611071,15.053607,221.258244,Low,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [83]:
transects.drop_duplicates().to_file("transects_extended.geojson", driver="GeoJSON")