# biophysical

> Functions for creating, modifying, and checking biphysical tables.

In [None]:
#| default_exp biophysical

In [None]:
#| hide
from nbdev.showdoc import *

  import pkg_resources,importlib


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| export
import rasterio
import numpy as np
import pandas as pd
import geopandas as gpd
import yaml
import os

In [None]:
#| export
from muir import BIOPHYSICAL_DB_COLUMNS
from muir.core import *
from muir.raster import *
from muir.visualization import *

In [None]:
#| export
def make_biophysical_table_template(
    lulc_df: pd.DataFrame,         # DataFrame with LULC classes and names
    columns: list,                 # List of column names for the biophysical values
    output_csv_path: str           # Output CSV file path
    ) -> pd.DataFrame:
    df = lulc_df.copy().reset_index(drop=True)
    table = pd.DataFrame()
    table["lucode"] = df["class"]
    table["description"] = df["class_name"]
    for col in columns:
        table[col] = ""
    table.to_csv(output_csv_path, index=False)
    return table

We can for example create a template for the Sediment Delivery Ratio (SDR) model we can then import to e.g. Excel or Google Sheets.

In [None]:
SDR_db_columns = BIOPHYSICAL_DB_COLUMNS["SDR"]
biophysical_table_csv = get_demo_path("biophysical_table/template_SDR.csv")

lulc_path = get_demo_path("lulc/mapbiomas-30m-2023-5880.tif")
lulc_clr_path = get_demo_path("lulc/mapbiomas-lulc-color-codes.clr")
lulc_df = load_lulc(lulc_path, lulc_clr_path)

db_template = make_biophysical_table_template(
    lulc_df, 
    SDR_db_columns,
    biophysical_table_csv
)
db_template.head(2)

Unnamed: 0,lucode,description,usle_c,usle_p
0,3,Forest Formation,,
1,11,Wetland,,


### Sensitivity Analysis Biophysical Table

When creating biophysical tables, we may often encounter multiple values in the literature. Therefore, we may want to try a range of values to see the effects on the output. Some common settings could be -50%, +50%, -10%, and +10%.

In [None]:
#| export
from pathlib import Path

def update_bio_db(
    biophysical_table_path: str, # Path to biophysical table to change
    lulc_code: int,              # LULC code to update
    param: str|list,                  # Parameter to update
    percentage_change: float,    # Percentage change to apply (e.g., -10 for -10%)
    save_dir:str|None = None      # Wether to save the new biodf to file
):
    bio_df = pd.read_csv(biophysical_table_path)
    change_factor = 1 + (percentage_change / 100)

    if isinstance(param, str):
        param = [param]
    for p in param:
        bio_df.loc[bio_df['lucode'] == lulc_code, p] *= change_factor

    if save_dir:
        Path(save_dir).mkdir(parents=True, exist_ok=True)

        stem = Path(biophysical_table_path).stem
        ext = Path(biophysical_table_path).suffix
        prcnt_str = str(percentage_change).replace('-', 'm') if percentage_change < 0 else "p" + str(percentage_change)
        
        bio_df_name = f"{stem}_lulc{lulc_code}-{''.join(param)}-{prcnt_str}{ext}"
        out_path = Path(save_dir) / bio_df_name

        bio_df.to_csv(out_path, index=False)
        print(f"Modified biophysical table saved to: {out_path}")
    
    return bio_df


In [None]:
bio_df_path = get_demo_path("biophysical_table/demo_SDR.csv")
bio_df = pd.read_csv(bio_df_path)
bio_df.head(2)

Unnamed: 0,lucode,description,usle_c,usle_p
0,3,Forest Formation,0.001,1.0
1,5,Mangrove,0.002,1.0


Let's say we want to update the usle_c, we can do it like so:

In [None]:
save_dir = get_demo_path("biophysical_table/sensitivity")
updated_df = update_bio_db(
    biophysical_table_path=bio_df_path, 
    lulc_code=3,                            # 3 for Forest Formation
    param='usle_c', 
    percentage_change=50, 
    save_dir=save_dir
)
updated_df.head(2)

Modified biophysical table saved to: /Volumes/creek/muir/muir/demo/biophysical_table/sensitivity/demo_SDR_lulc3-usle_c-p50.csv


Unnamed: 0,lucode,description,usle_c,usle_p
0,3,Forest Formation,0.0015,1.0
1,5,Mangrove,0.002,1.0


To determine which which lucodes to perform this sensitivity analysis with, sometimes we can limit to the 3 most common lulc codes in the raster.

In [None]:
pixel_stats = lulc_pixel_stats(get_demo_path("lulc/mapbiomas-30m-2023-5880.tif"))
pixel_stats = pixel_stats.sort_values("pixel_count", ascending=False)
pixel_stats.head(3)

Unnamed: 0,class,pixel_count,area_m2
1,3,1163784,954330300.0
7,24,699433,573551600.0
6,21,382874,313965700.0


For instance, we can see that the classes 3, 24, and 21 are the most common. So an initial step could be to target them.

In [None]:
top3_classes = pixel_stats.head(3)["class"].tolist()
lulc_df.set_index("class").loc[top3_classes][["class_name"]].reset_index()

Unnamed: 0,class,class_name
0,3,Forest Formation
1,24,Urban Area
2,21,Mosaic of Uses


### Validate Biophysical Table

Ensure your biophysical table contains the information it needs before running your InVEST model. This can save hours of frustration!

In [None]:
#| export
def check_biophysical_table(
    table: pd.DataFrame,
    model: str = None,
    lulc_df: pd.DataFrame = None
) -> None:
    """
    Checks a biophysical table for common issues and prints results with emojis.
    """
    # 1. Missing values
    missing_cells = table.isnull().sum().sum()
    if missing_cells > 0:
        print(f"❌ Missing values: {int(missing_cells)} cells are empty.")
        for col in table.columns:
            n_missing = table[col].isnull().sum()
            if n_missing > 0:
                print(f"   - Column '{col}': {n_missing} missing")
    else:
        print("✅ No missing values.")

    # 2. Missing columns (if model specified)
    if model:
        required_cols = set(BIOPHYSICAL_DB_COLUMNS.get(model, []))
        missing_cols = required_cols - set(table.columns)
        if missing_cols:
            print(f"❌ Missing required columns for model '{model}': {sorted(missing_cols)}")
        else:
            print(f"✅ All required columns for model '{model}' are present.")

    # 3. Missing lucode column (regardless)
    if 'lucode' not in table.columns:
        print("❌ Missing 'lucode' column.")
    else:
        print("✅ 'lucode' column present.")

    # 4. Missing/extra classes (if lulc_df specified)
    if lulc_df is not None and 'class' in lulc_df.columns and 'lucode' in table.columns:
        expected_classes = set(lulc_df['class'])
        table_classes = set(table['lucode'])
        missing_classes = expected_classes - table_classes
        extra_classes = table_classes - expected_classes

        if missing_classes:
            missing_info = [
                f"{cls} ({lulc_df.loc[lulc_df['class'] == cls, 'class_name'].values[0]})"
                for cls in sorted(missing_classes)
            ]
            print(f"❌ Missing LULC classes in table: {missing_info}")
        else:
            print("✅ All expected LULC classes are present.")

        if extra_classes:
            extra_info = []
            for cls in sorted(extra_classes):
                # Try to get class name from table if available, else just show code
                name = None
                if 'description' in table.columns:
                    name_row = table.loc[table['lucode'] == cls, 'description']
                    if not name_row.empty:
                        name = name_row.values[0]
                extra_info.append(f"{cls} ({name})" if name else str(cls))
            print(f"🟡 Extra LULC classes in table: {extra_info}")
        else:
            print("✅ No extra LULC classes in table.")

    # 5. Duplicate rows
    # Check for duplicated entire rows
    if table.duplicated().any():
        dup_rows = table[table.duplicated()].index.tolist()
        print(f"❌ Duplicate rows at indices: {dup_rows}")
    else:
        print("✅ No duplicate rows.")

    # Check for duplicated lulc class (lucode)
    if 'lucode' in table.columns:
        duplicated_lucode = table['lucode'][table['lucode'].duplicated()].unique()
        if len(duplicated_lucode) > 0:
            print(f"❌ Duplicated 'lucode' values: {duplicated_lucode.tolist()}")
        else:
            print("✅ No duplicated 'lucode' values.")

Let's try it out.

In [None]:
biophysical_table_path = get_demo_path("biophysical_table/tests/demo.csv")
demo_table = pd.read_csv(biophysical_table_path)
demo_table.head(2)

Unnamed: 0,lucode,description,usle_c,usle_p
0,3,Forest Formation,0.001,1.0
1,5,Mangrove,0.002,1.0


In [None]:
check_biophysical_table(demo_table, "SDR", lulc_df)

✅ No missing values.
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
✅ All expected LULC classes are present.
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)']
✅ No duplicate rows.
✅ No duplicated 'lucode' values.


We can now test different tables.

In [None]:
test_table_path = get_demo_path("biophysical_table/tests/extra_class.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True)
check_biophysical_table(test_table, "SDR", lulc_df)

✅ No missing values.
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
✅ All expected LULC classes are present.
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)', '55 (Extra Class)']
✅ No duplicate rows.
✅ No duplicated 'lucode' values.


In [None]:
test_table_path = get_demo_path("biophysical_table/tests/missing_class.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True)
check_biophysical_table(test_table, "SDR", lulc_df)

✅ No missing values.
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
❌ Missing LULC classes in table: ['15 (Pasture)']
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)']
✅ No duplicate rows.
✅ No duplicated 'lucode' values.


In [None]:
test_table_path = get_demo_path("biophysical_table/tests/missing_lucode_column.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True)
check_biophysical_table(test_table, "SDR", lulc_df)

✅ No missing values.
✅ All required columns for model 'SDR' are present.
❌ Missing 'lucode' column.
✅ No duplicate rows.


In [None]:
test_table_path = get_demo_path("biophysical_table/tests/missing_value.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True)
check_biophysical_table(test_table, "SDR", lulc_df)

❌ Missing values: 1 cells are empty.
   - Column 'usle_c': 1 missing
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
✅ All expected LULC classes are present.
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)']
✅ No duplicate rows.
✅ No duplicated 'lucode' values.


In [None]:
test_table_path = get_demo_path("biophysical_table/tests/missing_values.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True)
check_biophysical_table(test_table, "SDR", lulc_df)

❌ Missing values: 3 cells are empty.
   - Column 'usle_c': 2 missing
   - Column 'usle_p': 1 missing
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
✅ All expected LULC classes are present.
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)']
✅ No duplicate rows.
✅ No duplicated 'lucode' values.


In [None]:
test_table_path = get_demo_path("biophysical_table/tests/duplicate_row.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True, index_col=False)
check_biophysical_table(test_table, "SDR", lulc_df)

✅ No missing values.
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
✅ All expected LULC classes are present.
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)']
✅ No duplicate rows.
❌ Duplicated 'lucode' values: [20]


In [None]:
test_table_path = get_demo_path("biophysical_table/tests/duplicate_row_2.csv")
test_table = pd.read_csv(test_table_path, keep_default_na=True, index_col=False)
check_biophysical_table(test_table, "SDR", lulc_df)

✅ No missing values.
✅ All required columns for model 'SDR' are present.
✅ 'lucode' column present.
✅ All expected LULC classes are present.
🟡 Extra LULC classes in table: ['5 (Mangrove)', '20 (Sugar cane)', '23 (Beach, Dune and Sand Spot)', '32 (Hypersaline Tidal Flat)', '48 (Other Perennial Crops)', '49 (Wooded Sandbank Vegetation)', '50 (Herbaceous Sandbank Vegetation)']
❌ Duplicate rows at indices: [11]
❌ Duplicated 'lucode' values: [20]


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()