In [1]:
import geopandas as gpd
import pandas as pd
from glob import glob
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error

In [2]:
# Transects, origin is landward. Has beach_slope
transects = gpd.read_file("transects_extended.geojson")
transects.set_index("id", inplace=True)
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.892087,654.0,342.0,,,,,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-1.205575,654.0,473.0,,,,,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.698779,654.0,491.0,,,,,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,-0.303470,654.0,502.0,,,,,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,-0.105642,654.0,508.0,,,,,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
nzd0561-0005,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,0.042503,445.0,357.0,0.001362,6.642339,74.232517,8.615829,"LINESTRING (168.52345 -46.77522, 168.5303 -46...."
nzd0561-0006,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,0.061408,445.0,362.0,0.003186,6.365199,66.356889,8.145974,"LINESTRING (168.52243 -46.77582, 168.53017 -46..."
nzd0561-0007,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,0.182327,445.0,363.0,0.024228,6.757312,75.769537,8.704570,"LINESTRING (168.52143 -46.77658, 168.53024 -46..."
nzd0561-0008,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.042435,445.0,370.0,0.001161,7.374187,88.776828,9.422146,"LINESTRING (168.52068 -46.77733, 168.53016 -46..."


In [3]:
files = pd.Series(sorted(glob("data/*/transect_time_series_tidally_corrected.csv")))
files

0      data/nzd0001/transect_time_series_tidally_corr...
1      data/nzd0002/transect_time_series_tidally_corr...
2      data/nzd0003/transect_time_series_tidally_corr...
3      data/nzd0004/transect_time_series_tidally_corr...
4      data/nzd0005/transect_time_series_tidally_corr...
                             ...                        
555    data/nzd0557/transect_time_series_tidally_corr...
556    data/nzd0558/transect_time_series_tidally_corr...
557    data/nzd0559/transect_time_series_tidally_corr...
558    data/nzd0560/transect_time_series_tidally_corr...
559    data/nzd0561/transect_time_series_tidally_corr...
Length: 560, dtype: object

In [4]:
def get_trends(f):
  df = pd.read_csv(f)
  try:
    df.dates = pd.to_datetime(df.dates)
  except:
    print(f)
  df.index = (df.dates - df.dates.min()).dt.days / 365.25
  df.drop(columns=["dates", "satname"], inplace=True)
  trends = []
  for transect_id in df.columns:
    sub_df = df[transect_id].dropna()
    if not len(sub_df):
      continue
    x = sub_df.index.to_numpy().reshape(-1, 1)
    y = sub_df
    linear_model = LinearRegression().fit(x, y)
    pred = linear_model.predict(x)
    trends.append({
      "transect_id": transect_id,
      "trend": linear_model.coef_[0],
      "n_points": len(df[transect_id]),
      "n_points_nonan": len(sub_df),
      "r2_score": r2_score(y, pred),
      "mae": mean_absolute_error(y, pred),
      "mse": mean_squared_error(y, pred),
      "rmse": root_mean_squared_error(y, pred),
    })
  return pd.DataFrame(trends)
trends = pd.concat(process_map(get_trends, files)).set_index("transect_id")
len(trends)

  0%|          | 0/560 [00:00<?, ?it/s]

32327

In [5]:
trends[trends.n_points_nonan > 10].sort_values("r2_score")

Unnamed: 0_level_0,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse
transect_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
nzd0197-0033,0.000038,578,474,3.716228e-10,11.178529,200.620687,14.164063
nzd0410-0009,-0.000046,406,333,6.788201e-10,10.174822,164.726317,12.834575
nzd0345-0209,0.000090,500,360,3.044988e-09,9.561424,152.241001,12.338598
nzd0388-0007,0.000090,527,403,3.211779e-09,9.100395,140.747600,11.863709
nzd0536-0003,0.000074,452,420,3.830263e-09,6.276157,72.801184,8.532361
...,...,...,...,...,...,...,...
nzd0121-0003,14.560899,162,109,9.372930e-01,20.490619,708.805670,26.623405
nzd0121-0001,15.206793,162,102,9.383841e-01,22.312738,814.450439,28.538578
nzd0121-0002,15.388879,162,106,9.391778e-01,22.484659,798.228462,28.252937
nzd0121-0006,14.084433,162,107,9.419926e-01,19.728577,609.748206,24.693080


In [6]:
trends.describe()

Unnamed: 0,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse
count,32327.0,32327.0,32327.0,32327.0,32327.0,32327.0,32327.0
mean,0.083361,416.642033,305.686021,0.1437798,12.585212,362.25162,16.043195
std,1.932562,146.527824,111.864663,0.1861741,8.31178,797.776449,10.240643
min,-41.463963,97.0,10.0,3.716228e-10,3.893548,25.542549,5.053964
25%,-0.432849,322.0,231.0,0.0134313,8.302042,113.387023,10.648334
50%,0.054937,400.0,301.0,0.06364372,10.361117,174.236373,13.199863
75%,0.548462,533.0,389.0,0.1988232,13.722661,304.137998,17.439553
max,30.782313,776.0,661.0,0.9423842,118.28842,20472.735372,143.082967


In [7]:
(transects.trend - trends.trend).describe()

count    32349.000000
mean        -0.000025
std          0.002002
min         -0.210183
25%          0.000000
50%          0.000000
75%          0.000000
max          0.055900
Name: trend, dtype: float64

In [8]:
transects.update(trends)

In [9]:
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.892087,654.0,342.0,,,,,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-1.205575,654.0,473.0,,,,,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.698779,654.0,491.0,,,,,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,-0.303470,654.0,502.0,,,,,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,-0.105642,654.0,508.0,,,,,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
nzd0561-0005,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,0.042503,445.0,357.0,0.001362,6.642339,74.232517,8.615829,"LINESTRING (168.52345 -46.77522, 168.5303 -46...."
nzd0561-0006,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,0.061408,445.0,362.0,0.003186,6.365199,66.356889,8.145974,"LINESTRING (168.52243 -46.77582, 168.53017 -46..."
nzd0561-0007,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,0.182327,445.0,363.0,0.024228,6.757312,75.769537,8.704570,"LINESTRING (168.52143 -46.77658, 168.53024 -46..."
nzd0561-0008,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.042435,445.0,370.0,0.001161,7.374187,88.776828,9.422146,"LINESTRING (168.52068 -46.77733, 168.53016 -46..."


In [10]:
trends.columns, transects.columns, trends.columns.isin(transects.columns)

(Index(['trend', 'n_points', 'n_points_nonan', 'r2_score', 'mae', 'mse',
        'rmse'],
       dtype='object'),
 Index(['site_id', 'orientation', 'along_dist', 'along_dist_norm',
        'beach_slope', 'cil', 'ciu', 'trend', 'n_points', 'n_points_nonan',
        'r2_score', 'mae', 'mse', 'rmse', 'geometry'],
       dtype='object'),
 array([ True,  True,  True,  True,  True,  True,  True]))

In [11]:
transects = transects.join(trends.loc[:,~trends.columns.isin(transects.columns)])
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,r2_score,mae,mse,rmse,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.892087,654.0,342.0,,,,,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-1.205575,654.0,473.0,,,,,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.698779,654.0,491.0,,,,,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,-0.303470,654.0,502.0,,,,,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,-0.105642,654.0,508.0,,,,,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
nzd0561-0005,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,0.042503,445.0,357.0,0.001362,6.642339,74.232517,8.615829,"LINESTRING (168.52345 -46.77522, 168.5303 -46...."
nzd0561-0006,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,0.061408,445.0,362.0,0.003186,6.365199,66.356889,8.145974,"LINESTRING (168.52243 -46.77582, 168.53017 -46..."
nzd0561-0007,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,0.182327,445.0,363.0,0.024228,6.757312,75.769537,8.704570,"LINESTRING (168.52143 -46.77658, 168.53024 -46..."
nzd0561-0008,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.042435,445.0,370.0,0.001161,7.374187,88.776828,9.422146,"LINESTRING (168.52068 -46.77733, 168.53016 -46..."


In [12]:
transects.to_file("transects_extended.geojson", driver="GeoJSON")