In [None]:
# Option 3: Iterator approach - never load all at once
import os
import pandas as pd

folder_path = "../results/"
all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]

# Read first file to get structure, then append others
df = pd.read_csv(all_files[0])

for file in all_files[1:]:
    df_temp = pd.read_csv(file)
    df = pd.concat([df, df_temp], ignore_index=True)
    del df_temp

# pivot longer the columns 8 to 29 for years 2001 to 2021
df_long = df.melt(id_vars=['WDPA_PID', 'transectID', 'pointID', 'max_extent', 'gHM', 'elevation', 'slope'],
                  value_vars=[str(y) for y in range(2001, 2022)],
                  var_name='year',
                  value_name='value')

ModuleNotFoundError: No module named 'dask'

In [None]:
# Pivot to get values for each pointID as separate columns
pivot = df_long.pivot_table(
    index=['WDPA_PID', 'year', 'transectID'],
    columns='pointID',
    values=['value', 'gHM', 'elevation', 'slope', 'max_extent'],
    aggfunc='first'
).reset_index()

# Flatten column names
pivot.columns = ['_'.join(map(str, col)).strip('_') if col[1] != '' else col[0] 
                 for col in pivot.columns]

# Find the correct column names
elevation_0 = [c for c in pivot.columns if 'elevation' in c and '0' in c.split('_')[-1]]
slope_0 = [c for c in pivot.columns if 'slope' in c and '0' in c.split('_')[-1]]
value_0 = [c for c in pivot.columns if 'value' in c and '0' in c.split('_')[-1]]
gHM_outer = [c for c in pivot.columns if 'gHM' in c and c.split('_')[-1] in ['1', '2']]
value_outer = [c for c in pivot.columns if 'value' in c and c.split('_')[-1] in ['1', '2']]
value_inner = [c for c in pivot.columns if 'value' in c and c.split('_')[-1] in ['-1', '-2']]

df_summary = pd.DataFrame({
    'WDPA_PID': pivot['WDPA_PID'],
    'year': pivot['year'],
    'transectID': pivot['transectID'],
    'trnst_max_extent': pivot[[c for c in pivot.columns if c.startswith('max_extent_')]].max(axis=1),
    'gHM_mean_outer_pts': pivot[gHM_outer].mean(axis=1) if gHM_outer else None,
    'elevation_boundary': pivot[elevation_0[0]] if elevation_0 else None,
    'slope_boundary': pivot[slope_0[0]] if slope_0 else None,
    'ndvi_boundary': pivot[value_0[0]] if value_0 else None,
    'edge': ((pivot[value_0[0]] > pivot[value_outer].min(axis=1)) & 
             (pivot[value_0[0]] > pivot[value_inner].min(axis=1))).astype(int) if value_0 and value_outer and value_inner else 0
})

df_summary.head()