In [1]:
import geopandas as gpd
import pandas as pd
from glob import glob
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm

In [2]:
# Transects, origin is landward. Has beach_slope
transects = gpd.read_file("transects.geojson")
transects.set_index("id", inplace=True)
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
aus0001-0000,aus0001,104.347648,0.000000,0.000000,0.085,0.0545,0.2000,-1.892087,654.0,342.0,"LINESTRING (153.26555 -24.7007, 153.26938 -24...."
aus0001-0001,aus0001,93.495734,98.408334,0.002935,0.050,0.0387,0.0640,-1.205575,654.0,473.0,"LINESTRING (153.26525 -24.7019, 153.2692 -24.7..."
aus0001-0002,aus0001,82.069341,198.408334,0.005918,0.050,0.0428,0.0647,-0.698779,654.0,491.0,"LINESTRING (153.26539 -24.70316, 153.26931 -24..."
aus0001-0003,aus0001,81.192757,298.402523,0.008900,0.055,0.0480,0.0659,-0.303470,654.0,502.0,"LINESTRING (153.26555 -24.70408, 153.26945 -24..."
aus0001-0004,aus0001,81.065473,398.402523,0.011882,0.075,0.0614,0.0922,-0.105642,654.0,508.0,"LINESTRING (153.2657 -24.70497, 153.26961 -24...."
...,...,...,...,...,...,...,...,...,...,...,...
nzd0561-0005,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,0.204085,411.0,290.0,"LINESTRING (168.52498 -46.77668, 168.52724 -46..."
nzd0561-0006,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,0.196932,411.0,267.0,"LINESTRING (168.52416 -46.77717, 168.5267 -46...."
nzd0561-0007,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,0.358584,411.0,257.0,"LINESTRING (168.52342 -46.77775, 168.52627 -46..."
nzd0561-0008,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.196183,411.0,292.0,"LINESTRING (168.52282 -46.77836, 168.52587 -46..."


In [3]:
files = pd.Series(sorted(glob("data/*/transect_time_series_tidally_corrected.csv")))
files

0      data/nzd0001/transect_time_series_tidally_corr...
1      data/nzd0002/transect_time_series_tidally_corr...
2      data/nzd0003/transect_time_series_tidally_corr...
3      data/nzd0004/transect_time_series_tidally_corr...
4      data/nzd0005/transect_time_series_tidally_corr...
                             ...                        
555    data/nzd0557/transect_time_series_tidally_corr...
556    data/nzd0558/transect_time_series_tidally_corr...
557    data/nzd0559/transect_time_series_tidally_corr...
558    data/nzd0560/transect_time_series_tidally_corr...
559    data/nzd0561/transect_time_series_tidally_corr...
Length: 560, dtype: object

In [4]:
trends = []
for f in tqdm(files):
  df = pd.read_csv(f)
  df.dates = pd.to_datetime(df.dates)
  df.index = (df.dates - df.dates.min()).dt.days / 365.25
  df.drop(columns="dates", inplace=True)
  for transect_id in df.columns:
    sub_df = df[transect_id].dropna()
    if not len(sub_df):
      continue
    linear_model = LinearRegression().fit(sub_df.index.to_numpy().reshape(-1, 1), sub_df)
    trends.append({
      "transect_id": transect_id,
      "trend": linear_model.coef_[0]
    })
trends = pd.DataFrame(trends).set_index("transect_id")
trends

  0%|          | 0/560 [00:00<?, ?it/s]

Unnamed: 0_level_0,trend
transect_id,Unnamed: 1_level_1
nzd0001-0000,-0.076912
nzd0001-0001,-0.314803
nzd0001-0002,-0.377247
nzd0001-0003,-0.438843
nzd0001-0004,-0.469478
...,...
nzd0561-0005,0.204085
nzd0561-0006,0.196932
nzd0561-0007,0.358584
nzd0561-0008,0.196183


In [5]:
(transects.trend - trends.trend).describe()

count    32169.000000
mean        -0.000424
std          0.049882
min         -6.076570
25%          0.000000
50%          0.000000
75%          0.000000
max          3.113339
Name: trend, dtype: float64

In [6]:
transects.trend.update(trends.trend)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  transects.trend.update(trends.trend)


In [7]:
transects.to_file("transects.geojson", driver="GeoJSON")