# ***Estimation of GS and component biomass for forest sites which created 2020 and 2021 based on full data colection ***


# Load all data from .csv file

In [1]:
# Import main libraries

import numpy as np
import pandas as pd

In [2]:
# Load forest sites dataset
site_data = './output_other_authors_sites_biomass_20230125.xlsx'
df = pd.read_excel(site_data, sheet_name='output_2')
df.head()

Unnamed: 0,Groups,Species,Species_ID,Origin,Origin_ID,SI,A_years,H_m,DBH_cm,N_trees,...,My_R_Mst,My_R_Mcrown,My_R_Mf,My_R_Mab-g,xgb_GS,xgb_allStem,xgb_stemBark,xgb_branch,xgb_foliage,xgb_Ab_g
0,1,Alder,3,a,1.0,2,33,16.0,12.4,1700.0,...,74.209749,5.049571,2.213908,81.473227,172.530365,76.914726,12.753486,7.87004,2.562697,87.347466
1,1,Alder,3,a,1.0,-2,42,22.9,19.4,1136.0,...,170.3328,8.391685,2.493778,181.218262,418.893188,167.53447,21.907787,4.212999,1.790138,173.537598
2,2,Alder,3,a,1.0,2,33,16.0,12.4,1700.0,...,74.209749,5.049571,2.213908,81.473227,172.530365,76.914726,12.753486,7.87004,2.562697,87.347466
3,2,Alder,3,a,1.0,0,43,21.0,20.8,660.0,...,102.572743,6.356784,1.895789,110.825316,238.870056,110.138451,17.489775,10.177155,2.237852,122.553459
4,2,Alder,3,a,1.0,0,28,15.4,14.2,1369.0,...,74.175521,6.940699,2.528305,83.644525,159.462555,73.923576,11.62184,7.101619,2.530858,83.556053


In [3]:
# Information on all columns of data frame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 857 entries, 0 to 856
Data columns (total 35 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Groups          857 non-null    int64  
 1   Species         857 non-null    object 
 2   Species_ID      857 non-null    int64  
 3   Origin          856 non-null    object 
 4   Origin_ID       856 non-null    float64
 5   SI              857 non-null    int64  
 6   A_years         857 non-null    int64  
 7   H_m             857 non-null    float64
 8   DBH_cm          856 non-null    float64
 9   N_trees         846 non-null    float64
 10  RS(P)           854 non-null    float64
 11  BA_sq_m_sq_m    857 non-null    float64
 12  GS_cub_m        857 non-null    float64
 13  ALL_STEM_t_ha   855 non-null    float64
 14  Stem_wood_t_ha  747 non-null    float64
 15  Stem_bark_t_ha  747 non-null    float64
 16  Crown_t_ha      843 non-null    float64
 17  Foliage_t_ha    854 non-null    flo

In [4]:
# Selecting main working parameters

select = pd.DataFrame(df, columns= ['Species_ID', 'Origin_ID', 'H_m', 'DBH_cm', 'BA_sq_m_sq_m'])
'''
In the "ID_species" column: "0" is Pine, "1" is Spruce, "2" is Birch, "3" is Alder, 
"4" is Aspen, "5" is Oak
In the "ID_origin" column: "0" is nutural stand, "1" is artificial stand
'''

# Show the first five columns
select.head()

Unnamed: 0,Species_ID,Origin_ID,H_m,DBH_cm,BA_sq_m_sq_m
0,3,1.0,16.0,12.4,20.6
1,3,1.0,22.9,19.4,33.5
2,3,1.0,16.0,12.4,20.6
3,3,1.0,21.0,20.8,22.4
4,3,1.0,15.4,14.2,21.7


# Review of input data for classification

In [5]:
# Information on all columns of data frame
select.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 857 entries, 0 to 856
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Species_ID    857 non-null    int64  
 1   Origin_ID     856 non-null    float64
 2   H_m           857 non-null    float64
 3   DBH_cm        856 non-null    float64
 4   BA_sq_m_sq_m  857 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 33.6 KB


In [6]:
# Descriptive statistics of main parameters
select.describe()

Unnamed: 0,Species_ID,Origin_ID,H_m,DBH_cm,BA_sq_m_sq_m
count,857.0,856.0,857.0,856.0,857.0
mean,1.529755,0.279206,16.452275,17.132243,26.329988
std,1.823872,0.448871,7.039954,10.439115,10.629993
min,0.0,0.0,1.5,1.1,1.0
25%,0.0,0.0,11.2,9.9,19.0
50%,0.0,0.0,16.1,15.5,26.8
75%,3.0,1.0,21.2,22.1,34.5
max,5.0,1.0,38.3,94.0,51.5


# Selecting working columns for the XGBoost algorithms

In [7]:
# We select independent variables and predicting parameter 
X = select


In [8]:
# Columns in "X" data collections
X.head()

Unnamed: 0,Species_ID,Origin_ID,H_m,DBH_cm,BA_sq_m_sq_m
0,3,1.0,16.0,12.4,20.6
1,3,1.0,22.9,19.4,33.5
2,3,1.0,16.0,12.4,20.6
3,3,1.0,21.0,20.8,22.4
4,3,1.0,15.4,14.2,21.7


# Load XGBoost module and learned model for predict of species and other lands

In [9]:
#  Importing the main library for building model and its analysis

import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [10]:
# Load learned models

xgb_model_GS = xgb.XGBRegressor()
xgb_model_allStem = xgb.XGBRegressor()
xgb_model_stemBark = xgb.XGBRegressor()
xgb_model_br = xgb.XGBRegressor()
xgb_model_f = xgb.XGBRegressor()
xgb_model_LN_GS = xgb.XGBRegressor()
xgb_model_LN_allStem = xgb.XGBRegressor()
xgb_model_LN_stemBark = xgb.XGBRegressor()
xgb_model_LN_br = xgb.XGBRegressor()
xgb_model_LN_f = xgb.XGBRegressor()
xgb_model_GS.load_model('./XGBoost_models/01_CS_other_authors_XGB_model.json')
xgb_model_allStem.load_model('./XGBoost_models/02_M_all_stem_other_authors_XGB_model.json')
xgb_model_stemBark.load_model('./XGBoost_models/03_M_stem_bark_other_authors_XGB_model.json')
xgb_model_br.load_model('./XGBoost_models/04_M_branch_other_authors_XGB_model.json')
xgb_model_f.load_model('./XGBoost_models/05_M_foliage_other_authors_XGB_model.json')
xgb_model_LN_GS.load_model('./XGBoost_models/01_LN_CS_other_authors_XGB_model.json')
xgb_model_LN_allStem.load_model('./XGBoost_models/02_LN_M_all_stem_other_authors_XGB_model.json')
xgb_model_LN_stemBark.load_model('./XGBoost_models/03_LN_M_stem_bark_other_authors_XGB_model.json')
xgb_model_LN_br.load_model('./XGBoost_models/04_LN_M_branch_other_authors_XGB_model.json')
xgb_model_LN_f.load_model('./XGBoost_models/05_LN_M_foliage_other_authors_XGB_model.json')

In [11]:
# Show all parameters of XGBoost models

print(xgb_model_GS)
print(xgb_model_allStem)
print(xgb_model_stemBark)
print(xgb_model_br)
print(xgb_model_f)
print(xgb_model_LN_GS)
print(xgb_model_LN_allStem)
print(xgb_model_LN_stemBark)
print(xgb_model_LN_br)
print(xgb_model_LN_f)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.8821030853351247,
             enable_categorical=False, eval_metric=['rmse'],
             gamma=0.011864622396297486, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1209976158148326,
             max_delta_step=0, max_depth=4, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=178, n_jobs=4,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=0.5339951896800694,
             tree_method='auto', validate_parameters=1, verbosity=None)
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.8944536735419889,
             enable_categorical=False, eval_metric=['rmse'],
             gamma=0.04791876153524799, gpu_id=-1, importance_type=None,
     

In [12]:
# Create predict values

pred_GS = xgb_model_GS.predict(X, ntree_limit=xgb_model_GS.best_ntree_limit)
pred_allStem = xgb_model_allStem.predict(X, ntree_limit=xgb_model_allStem.best_ntree_limit)
pred_stemBark = xgb_model_stemBark.predict(X, ntree_limit=xgb_model_stemBark.best_ntree_limit)
pred_br = xgb_model_br.predict(X, ntree_limit=xgb_model_br.best_ntree_limit)
pred_f = xgb_model_f.predict(X, ntree_limit=xgb_model_f.best_ntree_limit)
pred_LN_GS = xgb_model_LN_GS.predict(X, ntree_limit=xgb_model_GS.best_ntree_limit)
pred_LN_allStem = xgb_model_LN_allStem.predict(X, ntree_limit=xgb_model_allStem.best_ntree_limit)
pred_LN_stemBark = xgb_model_LN_stemBark.predict(X, ntree_limit=xgb_model_stemBark.best_ntree_limit)
pred_LN_br = xgb_model_LN_br.predict(X, ntree_limit=xgb_model_br.best_ntree_limit)
pred_LN_f = xgb_model_LN_f.predict(X, ntree_limit=xgb_model_f.best_ntree_limit)



In [13]:
# Convert predicted parameters to real values

R_pred_GS = np.exp(pred_LN_GS)
R_pred_allStem = np.exp(pred_LN_allStem)
R_pred_stemBark = np.exp(pred_LN_stemBark)
R_pred_br = np.exp(pred_LN_br)
R_pred_f = np.exp(pred_LN_f)

In [14]:
data = {'xgb_GS_ver2':pred_GS, 'xgb_allStem_ver2':pred_allStem, 'xgb_stemBark_ver2':pred_stemBark, 'xgb_branch_ver2':pred_br, 'xgb_foliage_ver2':pred_f, 'T_ln_xgb_GS_ver2':R_pred_GS, 'T_ln_xgb_allStem_ver2':R_pred_allStem, 'T_ln_xgb_stemBark_ver2':R_pred_stemBark, 'T_ln_xgb_branch_ver2':R_pred_br, 'T_ln_xgb_foliage_ver2':R_pred_f}

In [15]:
# Add predicted values to the working dataframe
pred = pd.DataFrame(data)

In [16]:
# Show output data of biomass components
print(pred)

     Ln_xgb_GS  Ln_xgb_allStem  Ln_xgb_stemBark  Ln_xgb_branch  \
0     5.140347        4.326847         2.392613       1.930532   
1     5.988477        5.154976         3.195204       1.898181   
2     5.140347        4.326847         2.392613       1.930532   
3     5.429177        4.663519         2.764480       2.172356   
4     5.130596        4.341617         2.434403       2.056040   
..         ...             ...              ...            ...   
852   4.967692        4.230946         2.667916       2.902785   
853   5.536481        4.791327         3.180760       2.896634   
854   5.797106        5.008728         3.266805       3.191193   
855   6.032084        5.305983         3.741702       3.240403   
856   6.127394        5.323916         3.819647       3.595645   

     Ln_xgb_foliage  T_ln_xgb_GS  T_ln_xgb_allStem  T_ln_xgb_stemBark  \
0          0.718511   170.774933         75.705215          10.942053   
1          0.612538   398.806671        173.291626          2

In [17]:
# Show description statistics

pred.describe()

Unnamed: 0,Ln_xgb_GS,Ln_xgb_allStem,Ln_xgb_stemBark,Ln_xgb_branch,Ln_xgb_foliage,T_ln_xgb_GS,T_ln_xgb_allStem,T_ln_xgb_stemBark,T_ln_xgb_branch,T_ln_xgb_foliage
count,857.0,857.0,857.0,857.0,857.0,857.0,857.0,857.0,857.0,857.0
mean,5.166968,4.34205,2.161755,2.314026,1.362265,228.242706,101.499664,11.21979,12.692572,4.610232
std,0.835738,0.857159,0.763399,0.657023,0.622703,142.161331,66.323662,8.361879,10.248746,2.488325
min,1.081797,0.790769,-0.859796,0.082224,-1.420957,2.949975,2.205091,0.423248,1.085699,0.241483
25%,4.745595,3.886847,1.820216,1.930532,0.988928,115.076256,48.756916,6.173193,6.893176,2.688351
50%,5.329025,4.554136,2.193877,2.325948,1.460269,206.236755,95.024643,8.96992,10.23638,4.307118
75%,5.777792,4.928813,2.632865,2.621749,1.823859,323.045105,138.215286,13.913574,13.75977,6.195722
max,6.578265,5.840845,3.822549,4.143004,2.993153,719.2901,344.069824,45.720596,62.991791,19.948483


In [18]:
# Add predicted values to the working dataframe

df = df.join(pred)

In [19]:
# Calculating the sum of above-ground biomass using the XGBoost algorithm

sum_biomass = df['xgb_allStem'] + df['xgb_branch'] + df['xgb_foliage']
df['T_ln_xgb_Ab_g'] = sum_biomass

ln_sum_biomass = df['T_ln_xgb_allStem'] + df['T_ln_xgb_branch'] + df['T_ln_xgb_foliage']
df['T_ln_xgb_Ab_g_ver2'] = ln_sum_biomass

In [20]:
# Save new dataframe with predict values to .xlsx-file

df.to_excel('./Est_biomass_use_final_models.xlsx', sheet_name='output', index=False)