In [2]:
import geopandas as gpd
import pandas as pd
from glob import glob
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error

In [3]:
# Transects, origin is landward. Has beach_slope
transects = gpd.read_file("transects_extended.geojson")
transects.set_index("id", inplace=True)
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.892087,654.0,342.0,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-1.205575,654.0,473.0,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.698779,654.0,491.0,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,-0.303470,654.0,502.0,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,-0.105642,654.0,508.0,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...
nzd0561-0005,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,0.204085,411.0,290.0,"LINESTRING (168.52345 -46.77522, 168.5303 -46...."
nzd0561-0006,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,0.196932,411.0,267.0,"LINESTRING (168.52243 -46.77582, 168.53017 -46..."
nzd0561-0007,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,0.358584,411.0,257.0,"LINESTRING (168.52143 -46.77658, 168.53024 -46..."
nzd0561-0008,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.196183,411.0,292.0,"LINESTRING (168.52068 -46.77733, 168.53016 -46..."


In [4]:
files = pd.Series(sorted(glob("data/*/transect_time_series_tidally_corrected.csv")))
files

0      data/nzd0001/transect_time_series_tidally_corr...
1      data/nzd0002/transect_time_series_tidally_corr...
2      data/nzd0003/transect_time_series_tidally_corr...
3      data/nzd0004/transect_time_series_tidally_corr...
4      data/nzd0005/transect_time_series_tidally_corr...
                             ...                        
555    data/nzd0557/transect_time_series_tidally_corr...
556    data/nzd0558/transect_time_series_tidally_corr...
557    data/nzd0559/transect_time_series_tidally_corr...
558    data/nzd0560/transect_time_series_tidally_corr...
559    data/nzd0561/transect_time_series_tidally_corr...
Length: 560, dtype: object

In [6]:
trends = []
for f in tqdm(files):
  df = pd.read_csv(f)
  df.dates = pd.to_datetime(df.dates)
  df.index = (df.dates - df.dates.min()).dt.days / 365.25
  df.drop(columns=["dates", "satname"], inplace=True)
  for transect_id in df.columns:
    sub_df = df[transect_id].dropna()
    if not len(sub_df):
      continue
    x = sub_df.index.to_numpy().reshape(-1, 1)
    y = sub_df
    linear_model = LinearRegression().fit(x, y)
    pred = linear_model.predict(x)
    trends.append({
      "transect_id": transect_id,
      "trend": linear_model.coef_[0],
      "r2_score": r2_score(y, pred),
      "mae": mean_absolute_error(y, pred),
      "mse": mean_squared_error(y, pred),
      "rmse": root_mean_squared_error(y, pred),
    })
trends = pd.DataFrame(trends).set_index("transect_id")
trends

  0%|          | 0/560 [00:00<?, ?it/s]



Unnamed: 0_level_0,trend,r2_score,mae,mse,rmse
transect_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
nzd0001-0000,-0.223483,0.003289,21.597336,842.214155,29.020926
nzd0001-0001,-0.376229,0.015553,17.994905,497.012589,22.293779
nzd0001-0002,-0.473111,0.031216,16.160023,379.758453,19.487392
nzd0001-0003,-0.502773,0.043831,14.537516,304.449964,17.448495
nzd0001-0004,-0.553711,0.055225,13.808045,294.643056,17.165170
...,...,...,...,...,...
nzd0561-0005,0.099756,0.004372,7.008732,127.061190,11.272142
nzd0561-0006,0.158237,0.008083,6.958339,173.216322,13.161167
nzd0561-0007,0.284853,0.011564,8.436040,388.935029,19.721436
nzd0561-0008,0.041532,0.000827,7.726150,118.733279,10.896480


In [7]:
trends.describe()

Unnamed: 0,trend,r2_score,mae,mse,rmse
count,32327.0,32322.0,32327.0,32327.0,32327.0
mean,0.086994,0.09847829,16.15007,964.694985,25.830382
std,1.850206,0.1553333,11.529854,1521.765861,17.24806
min,-45.177433,1.151979e-10,0.0,0.0,0.0
25%,-0.437945,0.00592837,9.374294,201.0844,14.180423
50%,0.078096,0.03045289,12.176751,387.653307,19.688913
75%,0.602955,0.1149712,18.307949,995.624938,31.553525
max,38.545555,1.0,172.543382,37592.242235,193.887189


In [9]:
(transects.trend - trends.trend).describe()

count    32319.000000
mean        -0.117417
std          5.777806
min       -729.943185
25%         -0.146988
50%         -0.007729
75%          0.111684
max        311.934927
Name: trend, dtype: float64

In [10]:
transects.trend.update(trends.trend)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  transects.trend.update(trends.trend)


In [13]:
transects.merge(trends, how="left")

Unnamed: 0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,geometry,r2_score,mae,mse,rmse
0,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.892087,654.0,342.0,"LINESTRING (153.26555 -24.7007, 153.26938 -24....",,,,
1,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-1.205575,654.0,473.0,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7...",,,,
2,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.698779,654.0,491.0,"LINESTRING (153.26539 -24.70316, 153.26931 -24...",,,,
3,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,-0.303470,654.0,502.0,"LINESTRING (153.26555 -24.70408, 153.26945 -24...",,,,
4,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,-0.105642,654.0,508.0,"LINESTRING (153.2657 -24.70497, 153.26961 -24....",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127422,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,0.099756,411.0,290.0,"LINESTRING (168.52345 -46.77522, 168.5303 -46....",0.004372,7.008732,127.061190,11.272142
127423,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,0.158237,411.0,267.0,"LINESTRING (168.52243 -46.77582, 168.53017 -46...",0.008083,6.958339,173.216322,13.161167
127424,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,0.284853,411.0,257.0,"LINESTRING (168.52143 -46.77658, 168.53024 -46...",0.011564,8.436040,388.935029,19.721436
127425,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.041532,411.0,292.0,"LINESTRING (168.52068 -46.77733, 168.53016 -46...",0.000827,7.726150,118.733279,10.896480


In [14]:
transects.to_file("transects_extended.geojson", driver="GeoJSON")