In [1]:
%reload_ext autotime
import geopandas as gpd
import pandas as pd
from glob import glob
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error
from coastsat import SDS_transects

In [2]:
# Transects, origin is landward. Has beach_slope
transects = gpd.read_file("transects_extended.geojson")
transects.set_index("id", inplace=True)
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.456305,654.0,342.0,0.131951,28.721214,1321.059410,36.346381,179.407277,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-0.833581,654.0,473.0,0.052783,26.029482,1092.336862,33.050520,210.629160,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.282209,654.0,491.0,0.008220,22.896464,838.997233,28.965449,200.166307,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,0.089340,654.0,502.0,0.000987,20.770475,691.179232,26.290288,185.295473,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,0.361437,654.0,508.0,0.017461,19.528839,637.676213,25.252252,169.027861,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,,,,,,,,,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [3]:
vos_files = pd.Series(sorted(glob("shoreline_data_run6/*/time_series_tidally_corrected.csv")))
vos_files = vos_files[~vos_files.str.contains("nzd")]
vos_files

0       shoreline_data_run6/aus0001/time_series_tidall...
1       shoreline_data_run6/aus0002/time_series_tidall...
2       shoreline_data_run6/aus0003/time_series_tidall...
3       shoreline_data_run6/aus0004/time_series_tidall...
4       shoreline_data_run6/aus0005/time_series_tidall...
                              ...                        
3064    shoreline_data_run6/usa_HI_0051/time_series_ti...
3065    shoreline_data_run6/usa_HI_0052/time_series_ti...
3066    shoreline_data_run6/usa_HI_0053/time_series_ti...
3067    shoreline_data_run6/usa_HI_0054/time_series_ti...
3068    shoreline_data_run6/usa_HI_0057/time_series_ti...
Length: 2509, dtype: object

In [4]:
my_nz_files = pd.Series(sorted(glob("data/*/transect_time_series_tidally_corrected.csv")))
my_nz_files

0      data/nzd0001/transect_time_series_tidally_corr...
1      data/nzd0002/transect_time_series_tidally_corr...
2      data/nzd0003/transect_time_series_tidally_corr...
3      data/nzd0004/transect_time_series_tidally_corr...
4      data/nzd0005/transect_time_series_tidally_corr...
                             ...                        
557    data/nzd0559/transect_time_series_tidally_corr...
558    data/nzd0560/transect_time_series_tidally_corr...
559    data/nzd0561/transect_time_series_tidally_corr...
560    data/nzd0562/transect_time_series_tidally_corr...
561    data/nzd0563/transect_time_series_tidally_corr...
Length: 562, dtype: object

In [5]:
sar_files = pd.Series(sorted(glob("data/sar*/transect_time_series.csv")))
sar_files

0     data/sar0001/transect_time_series.csv
1     data/sar0002/transect_time_series.csv
2     data/sar0003/transect_time_series.csv
3     data/sar0004/transect_time_series.csv
4     data/sar0005/transect_time_series.csv
5     data/sar0006/transect_time_series.csv
6     data/sar0007/transect_time_series.csv
7     data/sar0008/transect_time_series.csv
8     data/sar0009/transect_time_series.csv
9     data/sar0010/transect_time_series.csv
10    data/sar0011/transect_time_series.csv
11    data/sar0012/transect_time_series.csv
12    data/sar0013/transect_time_series.csv
13    data/sar0014/transect_time_series.csv
14    data/sar0015/transect_time_series.csv
15    data/sar0016/transect_time_series.csv
16    data/sar0017/transect_time_series.csv
17    data/sar0018/transect_time_series.csv
18    data/sar0019/transect_time_series.csv
19    data/sar0020/transect_time_series.csv
20    data/sar0021/transect_time_series.csv
21    data/sar0022/transect_time_series.csv
22    data/sar0023/transect_time

In [6]:
files = pd.concat([vos_files, my_nz_files, sar_files])
files

0     shoreline_data_run6/aus0001/time_series_tidall...
1     shoreline_data_run6/aus0002/time_series_tidall...
2     shoreline_data_run6/aus0003/time_series_tidall...
3     shoreline_data_run6/aus0004/time_series_tidall...
4     shoreline_data_run6/aus0005/time_series_tidall...
                            ...                        
27                data/sar0028/transect_time_series.csv
28                data/sar0029/transect_time_series.csv
29                data/sar0030/transect_time_series.csv
30                data/sar0031/transect_time_series.csv
31                data/sar0032/transect_time_series.csv
Length: 3103, dtype: object

In [7]:
def despike(chainage, threshold=40):
    chainage = chainage.dropna()
    chainage, dates = SDS_transects.identify_outliers(chainage.tolist(), chainage.index.tolist(), threshold)
    return pd.Series(chainage, index=dates)

def get_trends(f):
  df = pd.read_csv(f)
  try:
    df.dates = pd.to_datetime(df.dates)
  except:
    print(f)
  df.index = (df.dates - df.dates.min()).dt.days / 365.25
  df.drop(columns=["dates", "satname", "Unnamed: 0"], inplace=True, errors='ignore')
  #if "sar" in f:
  #  df = df[~df.index.duplicated(keep="first")]
  #  df = df.apply(despike, axis=0)
  trends = []
  for transect_id in df.columns:
    sub_df = df[transect_id].dropna()
    if not len(sub_df):
      continue
    x = sub_df.index.to_numpy().reshape(-1, 1)
    y = sub_df
    linear_model = LinearRegression().fit(x, y)
    pred = linear_model.predict(x)
    trends.append({
      "transect_id": transect_id,
      "trend": linear_model.coef_[0],
      "intercept": linear_model.intercept_,
      "n_points": len(df[transect_id]),
      "n_points_nonan": len(sub_df),
      "r2_score": r2_score(y, pred),
      "mae": mean_absolute_error(y, pred),
      "mse": mean_squared_error(y, pred),
      "rmse": root_mean_squared_error(y, pred),
    })
  return pd.DataFrame(trends)
trends = pd.concat(process_map(get_trends, files)).set_index("transect_id")
len(trends)

  trends = pd.concat(process_map(get_trends, files)).set_index("transect_id")


  0%|          | 0/3103 [00:00<?, ?it/s]

122186

In [8]:
trends[trends.n_points_nonan > 10].sort_values("r2_score")

Unnamed: 0_level_0,trend,intercept,n_points,n_points_nonan,r2_score,mae,mse,rmse
transect_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
aus0213-0000,-0.000003,211.475107,543,487,8.622880e-12,7.797467,94.136738,9.702409
chi0406-0028,0.000005,183.818769,456,396,1.916567e-11,6.755653,85.309469,9.236312
mex0157-0034,-0.000008,225.225194,496,413,3.072342e-11,10.834152,183.654195,13.551907
usa_CA_0052-0040,-0.000010,226.224974,1340,971,5.089262e-11,10.782984,177.958484,13.340108
nzd0222-0012,0.000028,393.847817,391,335,3.824507e-10,8.683062,117.717688,10.849778
...,...,...,...,...,...,...,...,...
mex0004-0007,-7.151559,422.208551,453,432,9.769006e-01,8.743639,126.509611,11.247649
mex0004-0005,-7.365062,424.584864,453,411,9.769376e-01,9.308165,136.176728,11.669479
mex0004-0006,-7.311809,424.851589,453,424,9.771574e-01,9.168770,132.126977,11.494650
mex0367-0006,-17.743690,766.555581,1049,459,9.779147e-01,8.905623,142.902009,11.954163


In [9]:
trends[trends.index.str.startswith("sar")].describe()

Unnamed: 0,trend,intercept,n_points,n_points_nonan,r2_score,mae,mse,rmse
count,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0
mean,-0.223819,225.050481,687.335968,590.758893,0.03737793,18.945994,2046.834342,37.464495
std,0.525256,81.361365,131.693856,97.915057,0.08032571,16.651591,3220.510322,25.412567
min,-3.124628,67.955108,555.0,379.0,3.826962e-07,3.649225,22.174978,4.709032
25%,-0.386901,184.879728,613.0,528.0,0.001623629,7.808832,295.146853,17.179839
50%,-0.219868,207.204569,628.0,554.0,0.007902146,14.109523,1130.243728,33.619098
75%,-0.030022,242.479619,786.0,668.0,0.02647643,25.168775,2572.591459,50.72072
max,2.819796,786.024877,1032.0,910.0,0.4646739,129.908035,34798.192032,186.542735


In [10]:
trends.describe()

Unnamed: 0,trend,intercept,n_points,n_points_nonan,r2_score,mae,mse,rmse
count,122186.0,122186.0,122186.0,122186.0,122186.0,122186.0,122186.0,122186.0
mean,0.237446,238.623627,558.947334,437.688418,0.1705863,12.962061,358.656799,16.418984
std,2.021184,97.306612,255.459234,209.389992,0.2190558,7.554211,692.773732,9.437928
min,-61.721935,-2765.025713,56.0,10.0,8.62288e-12,3.181092,16.314089,4.03907
25%,-0.275359,185.684645,373.0,284.0,0.01500361,8.688594,121.972802,11.04413
50%,0.130287,217.594659,514.0,416.0,0.07246305,11.127622,197.756239,14.062583
75%,0.617063,313.702254,672.0,538.0,0.2418814,14.79249,348.32794,18.663546
max,89.734302,2329.832271,1567.0,1334.0,0.9788066,172.354249,57339.880098,239.45747


In [11]:
(transects.trend - trends.trend).describe()

count    122208.000000
mean          0.000257
std           0.012363
min          -0.923411
25%           0.000000
50%           0.000000
75%           0.000000
max           1.395203
Name: trend, dtype: float64

In [12]:
transects.update(trends.drop_duplicates())

In [13]:
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.456305,654.0,342.0,0.131951,28.721214,1321.059410,36.346381,179.407277,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-0.833581,654.0,473.0,0.052783,26.029482,1092.336862,33.050520,210.629160,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.282209,654.0,491.0,0.008220,22.896464,838.997233,28.965449,200.166307,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,0.089340,654.0,502.0,0.000987,20.770475,691.179232,26.290288,185.295473,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,0.361437,654.0,508.0,0.017461,19.528839,637.676213,25.252252,169.027861,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,,,,,,,,,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [14]:
trends.columns, transects.columns, trends.columns.isin(transects.columns)

(Index(['trend', 'intercept', 'n_points', 'n_points_nonan', 'r2_score', 'mae',
        'mse', 'rmse'],
       dtype='object'),
 Index(['site_id', 'orientation', 'along_dist', 'along_dist_norm',
        'beach_slope', 'cil', 'ciu', 'trend', 'n_points', 'n_points_nonan',
        'r2_score', 'mae', 'mse', 'rmse', 'intercept', 'geometry'],
       dtype='object'),
 array([ True,  True,  True,  True,  True,  True,  True,  True]))

In [15]:
transects = transects.join(trends.loc[:,~trends.columns.isin(transects.columns)])
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.456305,654.0,342.0,0.131951,28.721214,1321.059410,36.346381,179.407277,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-0.833581,654.0,473.0,0.052783,26.029482,1092.336862,33.050520,210.629160,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.282209,654.0,491.0,0.008220,22.896464,838.997233,28.965449,200.166307,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,0.089340,654.0,502.0,0.000987,20.770475,691.179232,26.290288,185.295473,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,0.361437,654.0,508.0,0.017461,19.528839,637.676213,25.252252,169.027861,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar2538-0004,sar2538,,,,,,,,,,,,,,,"LINESTRING (9.33568 41.29334, 9.33294 41.29629)"
sar2539-0000,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.40022 41.3002, 9.40172 41.30362)"
sar2539-0001,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.39917 41.3019, 9.4035 41.30039)"
sar2539-0002,sar2539,,,,,,,,,,,,,,,"LINESTRING (9.40237 41.3022, 9.39864 41.29996)"


In [16]:
transects[transects.site_id.str.startswith("sar") & ~transects.trend.isna()]

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,intercept,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
sar0001-0000,sar0001,,,,,,,-0.685966,555.0,485.0,0.004922,76.678370,9104.293413,95.416421,127.529113,"LINESTRING (8.40852 38.86175, 8.40882 38.86535)"
sar0001-0001,sar0001,,,,,,,-0.983614,555.0,502.0,0.007925,80.056456,11053.370391,105.135010,179.930802,"LINESTRING (8.4084 38.86162, 8.41092 38.86464)"
sar0001-0002,sar0001,,,,,,,-0.209302,555.0,503.0,0.001282,33.396619,3292.816055,57.383064,197.833575,"LINESTRING (8.40893 38.86153, 8.41236 38.86393)"
sar0001-0003,sar0001,,,,,,,-0.305481,555.0,502.0,0.003318,22.019435,2655.598751,51.532502,238.115325,"LINESTRING (8.40904 38.86129, 8.41333 38.8626)"
sar0001-0004,sar0001,,,,,,,-0.100502,555.0,498.0,0.000389,17.929006,2471.862282,49.717827,254.627357,"LINESTRING (8.40904 38.86176, 8.4133 38.86037)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sar0032-0003,sar0032,,,,,,,-0.435873,1032.0,854.0,0.009966,14.411751,1671.780452,40.887412,195.203633,"LINESTRING (8.57719 39.03096, 8.57257 39.03088)"
sar0032-0004,sar0032,,,,,,,-0.451104,1032.0,910.0,0.009320,13.740693,1863.411357,43.167249,196.196046,"LINESTRING (8.57674 39.03245, 8.57245 39.03111)"
sar0032-0005,sar0032,,,,,,,-0.758163,1032.0,901.0,0.029535,13.504544,1632.701327,40.406699,198.319795,"LINESTRING (8.57632 39.03321, 8.57196 39.03201)"
sar0032-0006,sar0032,,,,,,,-0.811921,1032.0,872.0,0.047030,12.821914,1165.618272,34.141152,211.836532,"LINESTRING (8.57587 39.03432, 8.57181 39.03261)"


In [17]:
transects.drop_duplicates().to_file("transects_extended.geojson", driver="GeoJSON")