In [1]:
from pathlib import Path
import pandas as pd

In [2]:
selected_metrics = [
  "mean__chm",
  "max__chm",
  "sd__chm",
  "cv__chm",

  "mean__crr",
  "mean__fhd",
  "mean__veg_height_cv",
  "cv__veg_height_median",
  "mean__veg_height_kurt",
  "sd__crr",
  "sd__vci",
  "mean__vci",

  "mean__groundstorey_capture",
  "mean__understorey_capture",
  "mean__midstorey_capture",
  "mean__upperstorey_capture",
  "sd__groundstorey_capture",
  "sd__understorey_capture",
  "sd__midstorey_capture",
  "sd__upperstorey_capture",

  "mean__canopy_cover_gt1m",
  "sd__canopy_cover_gt1m"
]


In [3]:
csv_dir = Path("../csvs")
plot_summaries = pd.read_csv(csv_dir / "plot_summary_metrics.csv")
plot_summaries['site'] = plot_summaries['id'].str[0:-3]
plot_summaries = plot_summaries.set_index('id')
plot_summaries = plot_summaries[['site', *selected_metrics]]

site_info = pd.read_csv(csv_dir / "site_info.csv")
site_info = site_info.set_index('site_id')
site_info = site_info.drop(columns=['Unnamed: 0'])
site_info['year_estab'] = site_info['year_estab'].fillna(1950)
site_info['years_since_dist'] = 2025 - site_info['year_estab']
site_info = site_info[['site_type', 'year_estab', 'years_since_dist', 'elev_mean', 'slope_mean']]
site_info['forest_type'] = site_info['site_type'].str[0:2]


# # Join site_info onto plot_summaries using the site column
plot_representative_metrics = plot_summaries.reset_index().merge(site_info.reset_index(), left_on='site', right_on='site_id', how='left')

plot_representative_metrics.to_csv(csv_dir / "plot_representative_metrics.csv")

plot_representative_metrics.columns


Index(['id', 'site', 'mean__chm', 'max__chm', 'sd__chm', 'cv__chm',
       'mean__crr', 'mean__fhd', 'mean__veg_height_cv',
       'cv__veg_height_median', 'mean__veg_height_kurt', 'sd__crr', 'sd__vci',
       'mean__vci', 'mean__groundstorey_capture', 'mean__understorey_capture',
       'mean__midstorey_capture', 'mean__upperstorey_capture',
       'sd__groundstorey_capture', 'sd__understorey_capture',
       'sd__midstorey_capture', 'sd__upperstorey_capture',
       'mean__canopy_cover_gt1m', 'sd__canopy_cover_gt1m', 'site_id',
       'site_type', 'year_estab', 'years_since_dist', 'elev_mean',
       'slope_mean', 'forest_type'],
      dtype='object')

In [4]:
# Create site-level representative metrics by grouping by site and taking the mean
# First, identify the metric columns (excluding non-metric columns)
non_metric_columns = ['id', 'site', 'site_id', 'site_type', 'year_estab', 'years_since_dist', 'elev_mean', 'slope_mean', 'forest_type']
metric_columns = [col for col in plot_representative_metrics.columns if col not in non_metric_columns]

# Group by site and calculate mean for metric columns
site_representative_metrics = plot_representative_metrics.groupby('site')[metric_columns].mean().reset_index()

# Add back the site information (taking the first value for each site since they should be the same)
site_info_cols = ['site_type', 'year_estab', 'years_since_dist', 'elev_mean', 'slope_mean', 'forest_type']
site_info_summary = plot_representative_metrics.groupby('site')[site_info_cols].first().reset_index()

# Merge the metric means with site information
site_representative_metrics = site_representative_metrics.merge(site_info_summary, on='site', how='left')

# Reorder columns to have site info first, then metrics
ordered_columns = ['site'] + site_info_cols + metric_columns
site_representative_metrics = site_representative_metrics[ordered_columns]

# Export to CSV
site_representative_metrics.to_csv(csv_dir / "site_representative_metrics.csv", index=False)

site_representative_metrics

Unnamed: 0,site,site_type,year_estab,years_since_dist,elev_mean,slope_mean,forest_type,mean__chm,max__chm,sd__chm,...,mean__groundstorey_capture,mean__understorey_capture,mean__midstorey_capture,mean__upperstorey_capture,sd__groundstorey_capture,sd__understorey_capture,sd__midstorey_capture,sd__upperstorey_capture,mean__canopy_cover_gt1m,sd__canopy_cover_gt1m
0,AGG_O_01,AGG,2011.0,14.0,499.878173,8.674255,AG,17.795598,38.8500,9.588873,...,0.485461,0.340963,0.152032,0.372651,0.304348,0.217247,0.120561,0.233728,0.672886,0.202604
1,AGG_O_05,AGG,2011.0,14.0,379.045078,0.638262,AG,17.953837,32.0180,6.743576,...,0.465148,0.274245,0.276296,0.404785,0.373857,0.258669,0.102098,0.230319,0.717404,0.165475
2,AGG_O_07,AGG,2013.0,12.0,504.282342,11.798216,AG,13.855632,40.6126,9.134959,...,0.325641,0.536895,0.000000,0.150111,0.297278,0.243842,0.000000,0.149971,0.638135,0.236373
3,AGG_Y_02,AGG,2019.0,6.0,374.493798,10.157180,AG,9.439318,30.3848,4.969538,...,0.636835,0.422181,0.285981,0.060774,0.287005,0.257476,0.083674,0.106688,0.655073,0.234546
4,AGG_Y_03,AGG,2019.0,6.0,280.278210,12.214606,AG,11.753187,41.9452,7.956366,...,0.484721,0.498216,0.309569,0.029361,0.304852,0.278710,0.090056,0.103083,0.626108,0.205937
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,ULO_271,ULO,1950.0,75.0,264.827836,8.790265,UL,21.914483,36.3238,5.465952,...,0.418785,0.268355,0.239480,0.481072,0.329154,0.268604,0.266516,0.246275,0.721113,0.197962
59,ULY_Y_231,ULY,2018.0,7.0,495.344440,14.730459,UL,25.492242,49.5954,14.996001,...,0.561955,0.261851,0.124661,0.281549,0.277385,0.240317,0.185151,0.281334,0.548123,0.265201
60,ULY_Y_232,ULY,2019.0,6.0,152.054739,9.794871,UL,12.777133,52.2856,14.471975,...,0.699039,0.297647,0.101104,0.117943,0.257525,0.241808,0.170153,0.205177,0.439321,0.271231
61,ULY_Y_25,ULY,2019.0,6.0,262.755022,18.912603,UL,31.919078,54.6412,17.079089,...,0.589710,0.557484,0.094196,0.254375,0.331031,0.287227,0.167680,0.244311,0.721464,0.204327
