In [1]:
import geopandas as gpd
import pandas as pd
from glob import glob
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm

In [2]:
# Transects, origin is landward. Has beach_slope
transects = gpd.read_file("transects.geojson").to_crs(2193)
transects = transects[transects.site_id.str.startswith("nz")]
transects.set_index("id", inplace=True)
transects

Unnamed: 0_level_0,site_id,orientation,along_dist,along_dist_norm,beach_slope,cil,ciu,trend,n_points,n_points_nonan,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
nzd0001-0000,nzd0001,359.037136,3197.737936,1.000000,0.050,0.0397,0.0679,-3.130269,182.0,145.0,"LINESTRING (1596659.015 6190463.061, 1596653.3..."
nzd0001-0001,nzd0001,359.037136,3097.737936,0.968728,,,,-2.328981,182.0,38.0,"LINESTRING (1596741.542 6190464.471, 1596735.8..."
nzd0001-0002,nzd0001,359.037136,2997.737936,0.937456,0.060,0.0478,0.0776,-2.961207,182.0,168.0,"LINESTRING (1596824.07 6190465.88, 1596818.41 ..."
nzd0001-0003,nzd0001,359.037136,2897.737936,0.906184,,,,-2.121564,182.0,60.0,"LINESTRING (1596906.598 6190467.288, 1596900.9..."
nzd0001-0004,nzd0001,359.037136,2797.737936,0.874912,0.055,0.0450,0.0664,-2.587407,182.0,173.0,"LINESTRING (1596989.125 6190468.696, 1596983.4..."
...,...,...,...,...,...,...,...,...,...,...,...
nzd0561-0005,nzd0561,141.008990,499.608316,0.555889,0.085,0.0759,0.1026,-0.145145,411.0,290.0,"LINESTRING (1258390.33 4809921.362, 1258575.00..."
nzd0561-0006,nzd0561,135.129214,599.517710,0.667053,0.090,0.0775,0.1055,-0.347302,411.0,267.0,"LINESTRING (1258331.155 4809863.747, 1258535.6..."
nzd0561-0007,nzd0561,127.431653,698.799788,0.777520,0.080,0.0715,0.0931,-0.013795,411.0,257.0,"LINESTRING (1258277.95 4809795.586, 1258505.20..."
nzd0561-0008,nzd0561,121.785300,798.799788,0.888785,0.070,0.0640,0.0812,0.108902,411.0,292.0,"LINESTRING (1258236.035 4809725.077, 1258477.3..."


In [3]:
files = pd.Series(sorted(glob("data/*/transect_time_series_tidally_corrected.csv")))
files

0      data/nzd0001/transect_time_series_tidally_corr...
1      data/nzd0002/transect_time_series_tidally_corr...
2      data/nzd0003/transect_time_series_tidally_corr...
3      data/nzd0006/transect_time_series_tidally_corr...
4      data/nzd0007/transect_time_series_tidally_corr...
                             ...                        
100    data/nzd0141/transect_time_series_tidally_corr...
101    data/nzd0144/transect_time_series_tidally_corr...
102    data/nzd0145/transect_time_series_tidally_corr...
103    data/nzd0147/transect_time_series_tidally_corr...
104    data/nzd0188/transect_time_series_tidally_corr...
Length: 105, dtype: object

In [4]:
trends = []
for f in tqdm(files):
  df = pd.read_csv(f)
  df.dates = pd.to_datetime(df.dates)
  df.index = (df.dates - df.dates.min()).dt.days / 365.25
  df.drop(columns="dates", inplace=True)
  for transect_id in df.columns:
    sub_df = df[transect_id].dropna()
    if not len(sub_df):
      continue
    linear_model = LinearRegression().fit(sub_df.index.to_numpy().reshape(-1, 1), sub_df)
    trends.append({
      "transect_id": transect_id,
      "trend": linear_model.coef_[0]
    })
trends = pd.DataFrame(trends).set_index("transect_id")
trends

  0%|          | 0/105 [00:00<?, ?it/s]

Unnamed: 0_level_0,trend
transect_id,Unnamed: 1_level_1
nzd0001-0000,-0.503516
nzd0001-0002,-0.803013
nzd0001-0004,-0.755699
nzd0001-0006,-0.618288
nzd0001-0007,-0.539765
...,...
nzd0188-0010,0.039585
nzd0188-0011,0.223785
nzd0188-0012,0.379949
nzd0188-0013,0.322830


In [5]:
(transects.trend - trends.trend).describe()

count    2284.000000
mean        0.087467
std         0.554034
min        -2.626754
25%        -0.203323
50%         0.023875
75%         0.287984
max         3.155735
Name: trend, dtype: float64

In [6]:
transects.trend.update(trends.trend)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  transects.trend.update(trends.trend)
