# 0.1.7: Correlation between sPlot and GBIF sparse trait maps

Following the approach of Wolf et al. (2022, Nature Ecology & Evolution), it would serve as a useful baseline to calculate the initial correlation between the sPlot-based sparse CWM trait maps and those sourced from GBIF.

## Imports and config

In [2]:
from pathlib import Path

import pandas as pd

from src.conf.conf import get_config
from src.utils.dataset_utils import get_trait_map_fns
from src.utils.raster_utils import open_raster


cfg = get_config()

Compute the correlation between each 

In [27]:
splot_fns = get_trait_map_fns("splot", cfg)
gbif_fns = get_trait_map_fns("gbif", cfg)

corr = []

for splot_fn, gbif_fn in zip(splot_fns, gbif_fns):
    assert splot_fn.stem == gbif_fn.stem, f"{splot_fn.stem} != {gbif_fn.stem}"

    splot_map = open_raster(splot_fn).sel(band=1)
    gbif_map = open_raster(gbif_fn).sel(band=1)

    splot_df = (
        splot_map.to_dataframe(name=f"{splot_fn.stem}")
        .drop(columns=["band", "spatial_ref"])
        .dropna()
    )
    gbif_df = (
        gbif_map.to_dataframe(name=f"{gbif_fn.stem}")
        .drop(columns=["band", "spatial_ref"])
        .dropna()
    )

    corr.append([splot_fn.stem, cfg.model_res, splot_df.corrwith(gbif_df).iloc[0]])

corr_df_this_res = pd.DataFrame(corr, columns=["trait_id", "resolution", "pearsonr"])

Write the correlation results to file.

In [3]:
corr_results_fn = Path("results", cfg.datasets.Y.correlation_fn)

corr_df = pd.read_csv(corr_results_fn) if corr_results_fn.exists() else corr_df_this_res

corr_df.to_csv(corr_results_fn, index=False)

We can repeat this for all resolutions (done separately).

## Visualize correlation results

In [9]:
from src.utils.trait_utils import get_trait_name_from_id


corr_df = pd.read_csv(Path("results", f"{cfg.datasets.Y.correlation_fn}"))

corr_df["trait_name"] = corr_df["trait_id"].map(lambda x: get_trait_name_from_id(x)[0])

In [19]:
# Structure:
# trait_id	resolution	pearsonr	trait_name
# 0	X1080	222km	0.182681	SRL
# 1	X13	222km	0.321142	Leaf C
# 2	X138	222km	0.353024	Seed number
# 3	X14	222km	0.435333	Leaf N (mass)
# 4	X144	222km	0.622890	Leaf length

resolution_order = ["1km", "22km", "55km", "111km", "222km"]

# We want to set the trait name as the index and the resolution as the columns
corr_df_formatted = corr_df.drop(columns=["trait_id"]).rename(columns={"trait_name": "Trait"}).pivot(
    index="Trait", columns="resolution", values="pearsonr"
).reindex(columns=resolution_order)

Save the table with LaTeX

In [27]:
latex_table = corr_df_formatted.to_latex(
    index=True,
    float_format="%.2f",
    caption="Pearson correlation between sPlot and GBIF sparse CWM trait grids.",
    label="tab:splot_gbif_correlation",
    position="c",
    index_names=False,
)

latex_table = (
    "\\centering\n"
    "\\rowcolors{2}{white}{gray!10}\n"  # or your preferred colors
    + latex_table
)

with open(Path("results", f"{Path(cfg.datasets.Y.correlation_fn).stem}.tex"), "w") as f:
    f.write(latex_table)