In [1]:
"""
Script to estimate AGB distribution in each cluster from the predictions made with allodb and BIOMASS R packages.
"""

import pandas as pd
import numpy as np
import geopandas as gpd
from abd_estimation import estimate_biomass_density

In [2]:
# Read the dataset

tallo = pd.read_csv("/home/dibepa/git/global.agb.ml/data/training/tmp_preprocessed/tallo_clusters_agb.csv")
print(tallo.columns)

Index(['Unnamed: 0', 'tree_id', 'division', 'family', 'genus', 'species',
       'latitude', 'longitude', 'stem_diameter_cm', 'height_m',
       'crown_radius_m', 'height_outlier', 'crown_radius_outlier',
       'reference_id', 'bio', 'bgr', 'cluster', 'speciesCorrected',
       'genusCorrected', 'meanWD', 'height_local_m', 'height_final_m',
       'agb_tropical', 'agb_extra_tropical'],
      dtype='object')


In [3]:
# Sort out where to use which AGB estimation depending on belonging to tropical region

extra_tropical_lat = 35
subtropical_lat = 23.43619

tallo["agb_kg"] = np.where(
    tallo.latitude >= extra_tropical_lat, tallo.agb_extra_tropical, 0.5*(tallo.agb_extra_tropical + tallo.agb_tropical)
)
tallo["agb_kg"] = np.where(
    tallo.latitude < subtropical_lat, tallo.agb_tropical, tallo.agb_kg
)

print(tallo)

        Unnamed: 0   tree_id    division      family   genus  \
0                1       T_1  Angiosperm  Betulaceae  Betula   
1                2       T_2  Gymnosperm    Pinaceae   Picea   
2                3       T_3  Gymnosperm    Pinaceae   Picea   
3                4       T_4  Gymnosperm    Pinaceae   Picea   
4                5       T_5  Gymnosperm    Pinaceae   Picea   
...            ...       ...         ...         ...     ...   
464993      464994  T_498247         NaN         NaN     NaN   
464994      464995  T_498268         NaN         NaN     NaN   
464995      464996  T_498274         NaN         NaN     NaN   
464996      464997  T_498286         NaN         NaN     NaN   
464997      464998  T_498319         NaN         NaN     NaN   

                 species  latitude  longitude  stem_diameter_cm  height_m  \
0       Betula pubescens    67.395     28.723              10.0       5.6   
1            Picea abies    67.395     28.723              10.0       6.6   


In [4]:
# Extract tree density data
tallo = gpd.GeoDataFrame(
    tallo, 
    geometry=gpd.points_from_xy(tallo.longitude, tallo.latitude)
)
tallo.crs = "EPSG:4326"

from dataset_creation import add_feature_from_raster

tree_density = "/home/dibepa/git/global.agb.ml/data/training/raw/tree_density/tree_density_biome_based_model_crowther_nature_2015_4326_float32.tiff"


tallo = add_feature_from_raster(tallo, "tree_density_km2", tree_density, "float")

tallo["tree_density_km2"] = np.where(tallo["tree_density_km2"]<0, np.nan, tallo["tree_density_km2"])
print(tallo)

        Unnamed: 0   tree_id    division      family   genus  \
0                1       T_1  Angiosperm  Betulaceae  Betula   
1                2       T_2  Gymnosperm    Pinaceae   Picea   
2                3       T_3  Gymnosperm    Pinaceae   Picea   
3                4       T_4  Gymnosperm    Pinaceae   Picea   
4                5       T_5  Gymnosperm    Pinaceae   Picea   
...            ...       ...         ...         ...     ...   
464993      464994  T_498247         NaN         NaN     NaN   
464994      464995  T_498268         NaN         NaN     NaN   
464995      464996  T_498274         NaN         NaN     NaN   
464996      464997  T_498286         NaN         NaN     NaN   
464997      464998  T_498319         NaN         NaN     NaN   

                 species  latitude  longitude  stem_diameter_cm  height_m  \
0       Betula pubescens    67.395     28.723              10.0       5.6   
1            Picea abies    67.395     28.723              10.0       6.6   


In [6]:
# Estimation of biomass density via sampling a generative distribution from KDE on cluster samples' (in kg/km^2)

# Identify which clusters have already been calculated

abd_tmp = pd.read_csv("/home/dibepa/git/global.agb.ml/data/training/tmp_preprocessed/abd_cluster_temp.csv")

calculated_clusters = abd_tmp["cluster"].unique()

tallo["remove"] = np.isin(tallo["cluster"],calculated_clusters)

tallo_reduced = tallo.drop( tallo[tallo["remove"]==True].index ) 

print(tallo_reduced[tallo_reduced["cluster"] == 1646])

kdeparams = {
    "niter" : 100,
    "bwmin" : 0.2,
    "bwmax" : 1.5,
    "kernel": "gaussian",
    "in_splits" : 5,
    "out_splits" : 10
}

# kg per km2
abd = estimate_biomass_density(tallo_reduced,"agb_kg","tree_density_km2","cluster",kdeparams,10,True,"/home/dibepa/git/global.agb.ml/data/training/tmp_preprocessed/abd_cluster_temp_2.csv")

print(abd)

abd = pd.concat([abd_tmp,abd],axis=0,ignore_index=True)

print(abd)


abd.to_csv("/home/dibepa/git/global.agb.ml/data/training/tmp_preprocessed/abd_cluster.csv")

        Unnamed: 0   tree_id    division      family      genus  \
438942      438943  T_342062  Angiosperm    Rosaceae  Crataegus   
438947      438948  T_342122  Angiosperm    Rosaceae     Sorbus   
438949      438950  T_342156  Angiosperm  Betulaceae     Betula   
438950      438951  T_342157  Gymnosperm    Pinaceae      Pinus   
438952      438953  T_342178  Angiosperm    Rosaceae     Prunus   
...            ...       ...         ...         ...        ...   
439601      439602  T_350125  Gymnosperm    Pinaceae      Pinus   
439603      439604  T_350211  Gymnosperm    Pinaceae      Pinus   
439605      439606  T_350245  Gymnosperm    Pinaceae      Pinus   
439608      439609  T_350303  Gymnosperm    Pinaceae      Pinus   
439610      439611  T_350323  Gymnosperm    Pinaceae      Pinus   

                      species  latitude  longitude  stem_diameter_cm  \
438942  Crataegus polyacantha     54.67      82.83               1.0   
438947       Sorbus aucuparia     54.67      82.83 