# ***Estimation of GS and components biomass for forest sites which created 2020 and 2021 + 5 obs. - 2018***


# Load all data from .csv file

In [1]:
# Import main libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Load forest sites dataset
site_data = './input_ChEZ_site_biomass.xlsx'
df = pd.read_excel(site_data, sheet_name='Main table')
df.head()

Unnamed: 0,Site #,R_m,Polygons #,Note,S_m2,Date,WGS84 N,WGS 84 E,GPS error (HRMS)_m,Origin,...,D_live_cm,H_live_m,G_live_m2_ha,N_live_pcs,My_R_M,My_R_Mst,My_R_Mst_bark,My_R_Mcrown,My_R_Mf,L_Mab-g
0,1,12.62,1.0,near Usiv village,500.343869,2020-11-03 09:43:00,51.435741,30.107979,21.24,n,...,13.464138,17.321431,15.081839,1060,130.819322,68.588671,10.326636,11.444009,2.304647,82.337327
1,2,5.64,1.0,near Usiv village,99.932806,2020-11-03 11:10:00,51.434093,30.108523,16.69,a,...,15.02271,16.890559,39.021239,2200,322.058547,124.310057,9.77918,14.058223,7.39204,145.76032
2,3,5.64,1.0,near Usiv village,99.932806,2020-11-03 11:47:00,51.434163,30.109196,16.69,a,...,13.706278,16.786747,42.817263,2900,352.146039,137.031385,10.669502,14.037291,7.702582,158.771258
3,4,12.62,1.0,near Usiv village,500.343869,2020-11-03 13:15:00,51.434217,30.110484,19.72,a,...,20.307634,18.393206,38.841072,1200,329.088919,120.11064,9.353148,16.965882,7.353216,144.429737
4,5,8.92,1.0,near Usiv village,249.965218,2020-11-03 14:00:00,51.434056,30.11198,22.76,a,...,17.54081,19.938317,48.336986,2000,451.71382,171.190623,12.545477,16.700591,7.894129,195.785342


In [3]:
# Information on all columns of data frame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Site #              205 non-null    object 
 1   R_m                 205 non-null    float64
 2   Polygons #          103 non-null    float64
 3   Note                205 non-null    object 
 4   S_m2                205 non-null    float64
 5   Date                205 non-null    object 
 6   WGS84 N             205 non-null    float64
 7   WGS 84 E            205 non-null    float64
 8   GPS error (HRMS)_m  205 non-null    float64
 9   Origin              205 non-null    object 
 10  Origin_ID           205 non-null    int64  
 11  Dominant_specias    205 non-null    object 
 12  Species_ID          205 non-null    int64  
 13  N_trees at site     205 non-null    int64  
 14  A_year              205 non-null    int64  
 15  D_live_cm           205 non-null    float64
 16  H_live_m

In [4]:
# Selecting main working parameters

select = pd.DataFrame(df, columns= ['Species_ID', 'Origin_ID', 'H_live_m', 'D_live_cm', 'G_live_m2_ha'])
'''
In the "ID_species" column: "0" is Pine, "1" is Spruce, "2" is Birch, "3" is Alder, 
"4" is Aspen, "5" is Oak
In the "ID_origin" column: "0" is nutural stand, "1" is artificial stand
'''

# Show the first five columns
select.head()

Unnamed: 0,Species_ID,Origin_ID,H_live_m,D_live_cm,G_live_m2_ha
0,2,0,17.321431,13.464138,15.081839
1,0,1,16.890559,15.02271,39.021239
2,0,1,16.786747,13.706278,42.817263
3,0,1,18.393206,20.307634,38.841072
4,0,1,19.938317,17.54081,48.336986


# Review of input data for classification

In [5]:
# Information on all columns of data frame
select.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Species_ID    205 non-null    int64  
 1   Origin_ID     205 non-null    int64  
 2   H_live_m      205 non-null    float64
 3   D_live_cm     205 non-null    float64
 4   G_live_m2_ha  205 non-null    float64
dtypes: float64(3), int64(2)
memory usage: 8.1 KB


In [6]:
# Descriptive statistics of main parameters
select.describe()

Unnamed: 0,Species_ID,Origin_ID,H_live_m,D_live_cm,G_live_m2_ha
count,205.0,205.0,205.0,205.0,205.0
mean,0.97561,0.458537,17.004617,17.680745,31.491
std,1.432942,0.499498,6.831136,8.497684,15.61472
min,0.0,0.0,2.70446,1.874874,0.656684
25%,0.0,0.0,12.19398,12.363541,19.766045
50%,0.0,0.0,17.466646,16.817809,32.001574
75%,2.0,1.0,21.604236,22.13312,43.559506
max,5.0,1.0,33.436497,41.664506,71.679126


# Selecting working columns for the XGBoost algorithms

In [7]:
# We select independent variables and predicting parameter 
X = select


In [8]:
# Columns in "X" data collections
X.head()

Unnamed: 0,Species_ID,Origin_ID,H_live_m,D_live_cm,G_live_m2_ha
0,2,0,17.321431,13.464138,15.081839
1,0,1,16.890559,15.02271,39.021239
2,0,1,16.786747,13.706278,42.817263
3,0,1,18.393206,20.307634,38.841072
4,0,1,19.938317,17.54081,48.336986


# Load XGBoost module and learned model for predict of species and other lands

In [9]:
#  Importing the main library for building model and its analysis

import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [11]:
# Load learned models

xgb_model_GS = xgb.XGBRegressor()
xgb_model_allStem = xgb.XGBRegressor()
xgb_model_stemBark = xgb.XGBRegressor()
xgb_model_br = xgb.XGBRegressor()
xgb_model_f = xgb.XGBRegressor()
xgb_model_LN_GS = xgb.XGBRegressor()
xgb_model_LN_allStem = xgb.XGBRegressor()
xgb_model_LN_stemBark = xgb.XGBRegressor()
xgb_model_LN_br = xgb.XGBRegressor()
xgb_model_LN_f = xgb.XGBRegressor()
xgb_model_GS.load_model('../XGBoost_models/01_CS_other_authors_XGB_model.json')
xgb_model_allStem.load_model('../XGBoost_models/02_M_all_stem_other_authors_XGB_model.json')
xgb_model_stemBark.load_model('../XGBoost_models/03_M_stem_bark_other_authors_XGB_model.json')
xgb_model_br.load_model('../XGBoost_models/04_M_branch_other_authors_XGB_model.json')
xgb_model_f.load_model('../XGBoost_models/05_M_foliage_other_authors_XGB_model.json')
xgb_model_LN_GS.load_model('../XGBoost_models/01_LN_CS_other_authors_XGB_model.json')
xgb_model_LN_allStem.load_model('../XGBoost_models/02_LN_M_all_stem_other_authors_XGB_model.json')
xgb_model_LN_stemBark.load_model('../XGBoost_models/03_LN_M_stem_bark_other_authors_XGB_model.json')
xgb_model_LN_br.load_model('../XGBoost_models/04_LN_M_branch_other_authors_XGB_model.json')
xgb_model_LN_f.load_model('../XGBoost_models/05_LN_M_foliage_other_authors_XGB_model.json')

In [12]:
# Show all parameters of XGBoost models

print(xgb_model_GS)
print(xgb_model_allStem)
print(xgb_model_stemBark)
print(xgb_model_br)
print(xgb_model_f)
print(xgb_model_LN_GS)
print(xgb_model_LN_allStem)
print(xgb_model_LN_stemBark)
print(xgb_model_LN_br)
print(xgb_model_LN_f)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1,
             colsample_bytree=0.8829191872975016, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=['rmse'],
             gamma=0.4317747333990657, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.17029218845451208, max_bin=256,
             max_cat_to_onehot=4, max_delta_step=0, max_depth=4, max_leaves=0,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=172, n_jobs=0, num_parallel_tree=1, predictor='auto',
             random_state=0, reg_alpha=0, reg_lambda=1, ...)
XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1,
             colsample_bytree=0.7658578011184654, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=['rmse

In [13]:
# Create predict values

pred_GS = xgb_model_GS.predict(X, ntree_limit=xgb_model_GS.best_ntree_limit)
pred_allStem = xgb_model_allStem.predict(X, ntree_limit=xgb_model_allStem.best_ntree_limit)
pred_stemBark = xgb_model_stemBark.predict(X, ntree_limit=xgb_model_stemBark.best_ntree_limit)
pred_br = xgb_model_br.predict(X, ntree_limit=xgb_model_br.best_ntree_limit)
pred_f = xgb_model_f.predict(X, ntree_limit=xgb_model_f.best_ntree_limit)
pred_T_GS = np.exp(xgb_model_LN_GS.predict(X, ntree_limit=xgb_model_LN_GS.best_ntree_limit))
pred_T_allStem = np.exp(xgb_model_LN_allStem.predict(X, ntree_limit=xgb_model_LN_allStem.best_ntree_limit))
pred_T_stemBark = np.exp(xgb_model_LN_stemBark.predict(X, ntree_limit=xgb_model_LN_stemBark.best_ntree_limit))
pred_T_br = np.exp(xgb_model_LN_br.predict(X, ntree_limit=xgb_model_LN_br.best_ntree_limit))
pred_T_f = np.exp(xgb_model_LN_f.predict(X, ntree_limit=xgb_model_LN_f.best_ntree_limit))



In [14]:
data = {'xgb_GS':pred_GS, 'xgb_allStem':pred_allStem, 'xgb_stemBark':pred_stemBark, 'xgb_branch':pred_br, 'xgb_foliage':pred_f, 'xgb_AGB': pred_allStem + pred_br + pred_f,
        'xgb_T_GS':pred_T_GS, 'xgb_T_allStem':pred_T_allStem, 'xgb_T_stemBark':pred_T_stemBark, 'xgb_T_branch':pred_T_br, 'xgb_T_foliage':pred_T_f, 'xgb_T_AGB': pred_T_allStem + pred_T_br + pred_T_f,}

In [15]:
# Add predicted values to the working dataframe
pred = pd.DataFrame(data)

In [16]:
# Show output data of biomass components
print(pred)

         xgb_GS  xgb_allStem  xgb_stemBark  xgb_branch  xgb_foliage  \
0    120.593628    54.981606      7.898229    8.124913     0.937326   
1    316.866241   116.822868      9.660381   14.680130     7.743390   
2    323.361633   113.312950     11.066424   16.290503     8.577758   
3    313.537292   140.757141     10.224663   15.069513     7.600546   
4    395.402252   136.587112     10.126310   20.770723     8.094455   
..          ...          ...           ...         ...          ...   
200  542.093140   184.126129     12.177953   16.967636     6.683193   
201   19.408239     9.746191      3.072955    2.810719     0.447873   
202    8.687505     2.876050      0.956395    3.717351     2.349898   
203  138.241318    56.116222      8.518388   20.948349     8.084133   
204   21.655817     9.283919      1.124965    3.649529     2.256010   

        xgb_AGB    xgb_T_GS  xgb_T_allStem  xgb_T_stemBark  xgb_T_branch  \
0     64.043846  118.336067      60.029221        8.074730      7.68995

In [18]:
# Show description statistics

pred.describe()

Unnamed: 0,xgb_GS,xgb_allStem,xgb_stemBark,xgb_branch,xgb_foliage,xgb_AGB,xgb_T_GS,xgb_T_allStem,xgb_T_stemBark,xgb_T_branch,xgb_T_foliage,xgb_T_AGB
count,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0
mean,269.991241,116.055687,11.780004,14.325582,5.039718,135.420975,268.693451,114.067444,11.073205,13.08962,4.725261,131.882309
std,171.568115,69.864189,7.529508,7.014427,2.347387,76.376099,170.970322,69.288445,6.525244,7.175945,2.70023,75.958527
min,8.02018,-0.888978,0.659912,2.714631,0.391777,4.6613,4.946199,2.557169,0.745822,0.940267,0.346663,4.841173
25%,135.103104,62.614922,7.854692,9.557897,2.86718,74.811768,130.890869,59.497803,7.327527,8.554614,2.215703,74.318825
50%,248.024765,113.161118,10.224663,13.785192,5.696259,133.00061,245.347137,111.0886,10.357712,13.339162,4.613266,129.578629
75%,387.771851,162.015457,15.705181,18.118845,6.991127,187.814423,392.363586,162.543427,13.712758,16.988461,6.817222,180.721146
max,685.973145,329.174469,58.481926,42.24696,9.618768,356.436066,636.920898,308.231232,39.897907,57.858124,11.74358,335.63382


In [19]:
# Add predicted values to the working dataframe

df = df.join(pred)

In [20]:
# Save new dataframe with predict values to .xlsx-file

df.to_excel('./xgb_sites_biomass.xlsx', sheet_name='output', index=False)