# ***Estimation of GS and component biomass for forest sites which created 2020 and 2021***


# Load all data from .csv file

In [1]:
# Import main libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# Load forest sites dataset
site_data = '/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_forest_sites/Main_site_parameters.xlsx'
df = pd.read_excel(site_data, sheet_name='Main table')
df.head()

Unnamed: 0,Site #,R_m,Polygons #,Note,S_m2,Date,WGS84 N,WGS 84 E,GPS error (HRMS)_m,Origin,...,AEDR_mean,AEDR_median,AEDR_std,Distance_km,Azimuth,My_R_M,My_R_Mst,My_R_Mcrown,My_R_Mf,L_Mab-g
0,36,8.92,6.0,near Kryva Hora village,249.965218,3/13/20 16:10,51.399713,30.203573,15.17,a,...,2.32963,2.32,0.23701,5.210114,9,103.374801,54.137266,9.394812,2.090228,65.622305
1,38,12.62,6.0,near Kryva Hora village,500.343869,3/13/20 16:45,51.396984,30.204024,15.17,a,...,2.4708,2.42,0.15763,5.046612,10,116.859921,61.020342,10.602809,1.853099,73.476251
2,90,12.62,10.0,the gradient near the road to Chystohalivky vi...,500.343869,6/18/20 10:06,51.38222,30.031455,16.69,a,...,2.441154,2.41,0.138017,5.058395,11,192.270648,101.330916,11.498063,2.418181,115.24716
3,034_2021,8.92,,2021 year,249.965218,2021-09-04 10:24:00,51.28046,29.94712,12.14,a,...,2.546522,2.52,0.132324,5.070074,11,201.294211,106.538867,12.371647,3.151581,122.062095
4,042_2021,5.64,,2021 year,99.932806,6/15/21 16:17,51.13401,30.13253,16.69,a,...,2.143333,2.125,0.113639,5.084393,13,154.452294,82.200459,9.686,3.977349,95.863808


In [4]:
# Information on all columns of data frame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 36 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Site #              205 non-null    object 
 1   R_m                 205 non-null    float64
 2   Polygons #          103 non-null    float64
 3   Note                205 non-null    object 
 4   S_m2                205 non-null    float64
 5   Date                205 non-null    object 
 6   WGS84 N             205 non-null    float64
 7   WGS 84 E            205 non-null    float64
 8   GPS error (HRMS)_m  205 non-null    float64
 9   Origin              205 non-null    object 
 10  Origin_ID           205 non-null    int64  
 11  Dominant_specias    205 non-null    object 
 12  Species_ID          205 non-null    int64  
 13  N_trees at site     205 non-null    int64  
 14  A_year              205 non-null    int64  
 15  D_live_cm           205 non-null    float64
 16  H_live_m

In [5]:
# Selecting main working parameters

select = pd.DataFrame(df, columns= ['Species_ID', 'Origin_ID', 'H_live_m', 'D_live_cm', 'G_live_m2_ha'])
'''
In the "ID_species" column: "0" is Pine, "1" is Spruce, "2" is Birch, "3" is Alder, 
"4" is Aspen, "5" is Oak
In the "ID_origin" column: "0" is nutural stand, "1" is artificial stand
'''

# Show the first five columns
select.head()

Unnamed: 0,Species_ID,Origin_ID,H_live_m,D_live_cm,G_live_m2_ha
0,2,1,13.589353,12.307179,14.277383
1,2,1,16.436649,16.331526,13.397533
2,2,1,17.208728,20.745563,24.99617
3,2,1,17.87175,14.681228,26.411903
4,2,1,12.19398,8.229658,29.275753


# Review of input data for classification

In [6]:
# Information on all columns of data frame
select.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Species_ID    205 non-null    int64  
 1   Origin_ID     205 non-null    int64  
 2   H_live_m      205 non-null    float64
 3   D_live_cm     205 non-null    float64
 4   G_live_m2_ha  205 non-null    float64
dtypes: float64(3), int64(2)
memory usage: 8.1 KB


In [7]:
# Descriptive statistics of main parameters
select.describe()

Unnamed: 0,Species_ID,Origin_ID,H_live_m,D_live_cm,G_live_m2_ha
count,205.0,205.0,205.0,205.0,205.0
mean,0.97561,0.458537,17.004617,17.680745,31.491
std,1.432942,0.499498,6.831136,8.497684,15.61472
min,0.0,0.0,2.70446,1.874874,0.656684
25%,0.0,0.0,12.19398,12.363541,19.766045
50%,0.0,0.0,17.466646,16.817809,32.001574
75%,2.0,1.0,21.604236,22.13312,43.559506
max,5.0,1.0,33.436497,41.664506,71.679126


# Selecting working columns for the XGBoost algorithms

In [8]:
# We select independent variables and predicting parameter 
X = select


In [9]:
# Columns in "X" data collections
X.head()

Unnamed: 0,Species_ID,Origin_ID,H_live_m,D_live_cm,G_live_m2_ha
0,2,1,13.589353,12.307179,14.277383
1,2,1,16.436649,16.331526,13.397533
2,2,1,17.208728,20.745563,24.99617
3,2,1,17.87175,14.681228,26.411903
4,2,1,12.19398,8.229658,29.275753


# Load XGBoost module and learned model for predict of species and other lands

In [10]:
#  Importing the main library for building model and its analysis

import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [12]:
# Load learned models

xgb_model_GS = xgb.XGBRegressor()
xgb_model_allStem = xgb.XGBRegressor()
xgb_model_stemBark = xgb.XGBRegressor()
xgb_model_br = xgb.XGBRegressor()
xgb_model_f = xgb.XGBRegressor()
xgb_model_GS.load_model('/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_other_authors/xgb_model_GS.json')
xgb_model_allStem.load_model('/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_other_authors/xgb_model_M_all_stem.json')
xgb_model_stemBark.load_model('/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_other_authors/xgb_model_M_stem_bark.json')
xgb_model_br.load_model('/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_other_authors/xgb_model_M_brench.json')
xgb_model_f.load_model('/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_other_authors/xgb_model_M_foliage.json')

In [13]:
# Show all parameters of XGBoost models

print(xgb_model_GS)
print(xgb_model_allStem)
print(xgb_model_stemBark)
print(xgb_model_br)
print(xgb_model_f)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.8829191872975016,
             enable_categorical=False, eval_metric=['rmse'],
             gamma=0.4317747333990657, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.17029218845451208,
             max_delta_step=0, max_depth=4, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=172, n_jobs=4,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=0.894854733579894,
             tree_method='auto', validate_parameters=1, verbosity=None)
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.7658578011184654,
             enable_categorical=False, eval_metric=['rmse'],
             gamma=0.19219571931631796, gpu_id=-1, importance_type=None,
       

In [14]:
# Create predict values

pred_GS = xgb_model_GS.predict(X, ntree_limit=xgb_model_GS.best_ntree_limit)
pred_allStem = xgb_model_allStem.predict(X, ntree_limit=xgb_model_allStem.best_ntree_limit)
pred_stemBark = xgb_model_stemBark.predict(X, ntree_limit=xgb_model_stemBark.best_ntree_limit)
pred_br = xgb_model_br.predict(X, ntree_limit=xgb_model_br.best_ntree_limit)
pred_f = xgb_model_f.predict(X, ntree_limit=xgb_model_f.best_ntree_limit)



In [15]:
data = {'xgb_GS':pred_GS, 'xgb_allStem':pred_allStem, 'xgb_stemBark':pred_stemBark, 'xgb_branch':pred_br, 'xgb_foliage':pred_f}

In [16]:
# Add predicted values to the working dataframe
pred = pd.DataFrame(data)

In [17]:
# Show output data of biomass components
print(pred)

         xgb_GS  xgb_allStem  xgb_stemBark  xgb_branch  xgb_foliage
0    114.526619    48.179855      8.173324   11.985409     3.098561
1    114.684410    59.773918      9.872718    8.177416     2.313968
2    216.876602    92.307297     16.811258   15.625483     3.877145
3    216.899292   109.654404     18.969255   13.896049     4.341577
4    181.852356    86.640785     13.627847    9.598576     5.098509
..          ...          ...           ...         ...          ...
200  246.366943   109.859985      8.817554   11.508252     5.798512
201  192.956726    75.946426      3.892488   15.341810     5.604110
202  130.177994    50.772526      6.351421    7.717212     7.269567
203  519.679565   227.409241     15.379052   21.424997     6.018657
204  381.925568   152.316772      8.690419   13.684348     5.836356

[205 rows x 5 columns]


In [18]:
# Show description statistics

pred.describe()

Unnamed: 0,xgb_GS,xgb_allStem,xgb_stemBark,xgb_branch,xgb_foliage
count,205.0,205.0,205.0,205.0,205.0
mean,270.297455,113.181343,11.496511,13.618158,5.13124
std,172.042114,66.443817,7.383583,6.560255,2.196967
min,9.883698,-1.073571,0.170447,2.110397,0.375821
25%,135.839706,60.887459,7.20055,9.706811,3.113811
50%,246.230118,109.859985,10.027045,13.57025,5.444109
75%,397.243683,165.148361,15.379052,17.049719,7.134203
max,670.750427,260.462006,52.343143,41.051495,8.737684


In [19]:
# Add predicted values to the working dataframe

df = df.join(pred)

In [20]:
# Calculating the sum of above-ground biomass using the XGBoost algorithm

sum_biomass = df['xgb_allStem'] + df['xgb_branch'] + df['xgb_foliage']
df['xgb_Ab_g'] = sum_biomass

In [21]:
# Save new dataframe with predict values to .xlsx-file

df.to_excel('/home/dima/Desktop/JupyterLab/GIS and ML algoritms/1_Estimation of tree biomass/Est_biomass_forest_sites/summ_sites_biomass.xlsx', sheet_name='output', index=False)