## Processing with biomass data of other authors

# Load all data from .csv file

In [1]:
# Import main libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import shap

In [2]:
# Load biomass dataset
input_data = './FULL_DB_BIOMASS_OTHER_ALL_AUTHORS.csv'
df = pd.read_csv(input_data)
df.head()

Unnamed: 0,Groups,Species,Species_ID,Origin,Origin_ID,SI,A_years,H_m,DBH_cm,N_trees,...,Stem_wood_t_ha,Stem_bark_t_ha,Crown_t_ha,Foliage_t_ha,LN_GS_cub_m,LN_ALL_STEM_t_ha,LN_Stem_wood_t_ha,LN_Stem_bark_t_ha,LN_Crown_t_ha,LN_Foliage_t_ha
0,1,Pine,0,n,0.0,4,50,9.7,14.6,486.0,...,,,8.4,1.6,3.78,2.94,,,2.13,0.47
1,1,Pine,0,n,0.0,2,65,20.6,20.6,785.0,...,,,11.5,4.1,5.54,4.73,,,2.44,1.41
2,1,Pine,0,n,0.0,2,50,13.6,15.4,1745.0,...,,,13.7,4.1,5.25,4.48,,,2.62,1.41
3,1,Pine,0,n,0.0,1,60,23.3,22.2,665.0,...,,,14.4,5.4,5.63,4.67,,,2.67,1.69
4,1,Pine,0,n,0.0,3,65,15.6,16.4,955.0,...,,,14.2,3.4,4.96,4.2,,,2.65,1.22


# List of all parameters for est. of bioproductivity and RN content
"""

All parameters:

'Groups','Species_ID','Origin_ID','SI','A_years','H_m','DBH_cm','RS(P)',          'BA_sq_m_ha','GS_cub_m','ALL_STEM_t_ha','Stem_wood_t_ha','Stem_bark_t_ha',
'Crown_t_ha','Foliage_t_ha'
'LN_GS_cub_m', 'LN_ALL_STEM_t_ha', 'LN_Stem_wood_t_ha', 'LN_Stem_bark_t_ha', 'LN_Crown_t_ha', 'LN_Foliage_t_ha'

"""

In [3]:
# Selecting main working parameters
select = pd.DataFrame(df, columns=['Species_ID','Origin_ID','H_m','DBH_cm', 'BA_sq_m_ha','GS_cub_m','ALL_STEM_t_ha','Stem_wood_t_ha','Stem_bark_t_ha',
'Crown_t_ha','Foliage_t_ha', 'LN_GS_cub_m', 'LN_ALL_STEM_t_ha', 'LN_Stem_wood_t_ha', 'LN_Stem_bark_t_ha', 'LN_Crown_t_ha', 'LN_Foliage_t_ha'])
            
'''
In "Species_ID" column: "0" is Pine, "1" is Spruce, "2" is Birch, "3" is Alder, "4" is Aspen, "5" is Oak
In "Origin_ID" column: "0.0" is nutural stand, "1.0" is artificial stand
'''

# Show the first five columns
select.head()

Unnamed: 0,Species_ID,Origin_ID,H_m,DBH_cm,BA_sq_m_ha,GS_cub_m,ALL_STEM_t_ha,Stem_wood_t_ha,Stem_bark_t_ha,Crown_t_ha,Foliage_t_ha,LN_GS_cub_m,LN_ALL_STEM_t_ha,LN_Stem_wood_t_ha,LN_Stem_bark_t_ha,LN_Crown_t_ha,LN_Foliage_t_ha
0,0,0.0,9.7,14.6,9.7,43.9,18.9,,,8.4,1.6,3.78,2.94,,,2.13,0.47
1,0,0.0,20.6,20.6,32.2,254.0,113.1,,,11.5,4.1,5.54,4.73,,,2.44,1.41
2,0,0.0,13.6,15.4,35.2,190.0,87.8,,,13.7,4.1,5.25,4.48,,,2.62,1.41
3,0,0.0,23.3,22.2,29.6,277.7,106.5,,,14.4,5.4,5.63,4.67,,,2.67,1.69
4,0,0.0,15.6,16.4,20.4,142.5,67.0,,,14.2,3.4,4.96,4.2,,,2.65,1.22


# Pre-analysis of parameters

In [4]:
# Information on all columns of data frame
select.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 857 entries, 0 to 856
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Species_ID         857 non-null    int64  
 1   Origin_ID          856 non-null    float64
 2   H_m                857 non-null    float64
 3   DBH_cm             856 non-null    float64
 4   BA_sq_m_ha         857 non-null    float64
 5   GS_cub_m           857 non-null    float64
 6   ALL_STEM_t_ha      855 non-null    float64
 7   Stem_wood_t_ha     747 non-null    float64
 8   Stem_bark_t_ha     747 non-null    float64
 9   Crown_t_ha         843 non-null    float64
 10  Foliage_t_ha       854 non-null    float64
 11  LN_GS_cub_m        857 non-null    float64
 12  LN_ALL_STEM_t_ha   855 non-null    float64
 13  LN_Stem_wood_t_ha  747 non-null    float64
 14  LN_Stem_bark_t_ha  747 non-null    float64
 15  LN_Crown_t_ha      843 non-null    float64
 16  LN_Foliage_t_ha    854 non

In [5]:
# Discribe statistic of forest sites by species and origin
df.groupby(["Species_ID"])["LN_GS_cub_m", "ALL_STEM_t_ha"].describe()

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.


Unnamed: 0_level_0,LN_GS_cub_m,LN_GS_cub_m,LN_GS_cub_m,LN_GS_cub_m,LN_GS_cub_m,LN_GS_cub_m,LN_GS_cub_m,LN_GS_cub_m,ALL_STEM_t_ha,ALL_STEM_t_ha,ALL_STEM_t_ha,ALL_STEM_t_ha,ALL_STEM_t_ha,ALL_STEM_t_ha,ALL_STEM_t_ha,ALL_STEM_t_ha
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Species_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
0,458.0,5.192052,0.897136,0.88,4.835,5.415,5.8075,6.6,457.0,97.049891,61.854506,1.8,47.5,92.7,135.9,310.7
1,7.0,5.49,0.589463,4.65,5.035,5.7,5.965,6.08,7.0,123.814286,61.879895,46.3,70.65,133.9,174.55,196.1
2,120.0,4.741667,0.702628,2.31,4.215,4.855,5.3025,5.92,120.0,74.495,44.523333,5.1,37.275,68.1,103.075,193.5
3,115.0,5.345478,0.624346,3.32,4.925,5.5,5.8,6.31,115.0,107.607826,55.589186,11.0,62.7,106.8,145.45,251.6
4,66.0,5.057273,0.732497,3.05,4.6175,5.01,5.6675,6.26,66.0,84.94697,58.875162,8.2,40.75,61.2,125.0,245.4
5,91.0,5.43022,0.830515,2.2,5.07,5.61,6.03,6.4,90.0,173.065556,99.100647,5.0,93.075,161.3,253.375,363.9


In [6]:
# Descriptive statistics of main parameters
select.describe()

Unnamed: 0,Species_ID,Origin_ID,H_m,DBH_cm,BA_sq_m_ha,GS_cub_m,ALL_STEM_t_ha,Stem_wood_t_ha,Stem_bark_t_ha,Crown_t_ha,Foliage_t_ha,LN_GS_cub_m,LN_ALL_STEM_t_ha,LN_Stem_wood_t_ha,LN_Stem_bark_t_ha,LN_Crown_t_ha,LN_Foliage_t_ha
count,857.0,856.0,857.0,856.0,857.0,857.0,855.0,747.0,747.0,843.0,854.0,857.0,855.0,747.0,747.0,843.0,854.0
mean,1.529755,0.279206,16.452275,17.132243,26.329988,229.013069,102.590877,92.779518,12.060107,13.821969,4.749356,5.166919,4.343766,4.246975,2.201232,2.318149,1.364906
std,1.823872,0.448871,7.039954,10.439115,10.629993,144.362106,68.758819,61.992238,9.595428,11.567546,2.826586,0.83968,0.869479,0.854856,0.806766,0.829446,0.672551
min,0.0,0.0,1.5,1.1,1.0,2.4,1.8,2.0,0.3,0.5,0.2,0.88,0.59,0.69,-1.2,-0.69,-1.61
25%,0.0,0.0,11.2,9.9,19.0,116.0,47.5,42.9,5.95,6.61,2.6,4.75,3.86,3.76,1.78,1.89,0.96
50%,0.0,0.0,16.1,15.5,26.8,206.0,93.5,84.6,9.3,10.82,4.3,5.33,4.54,4.44,2.23,2.38,1.46
75%,3.0,1.0,21.2,22.1,34.5,320.0,140.5,125.5,15.3,16.435,6.3675,5.77,4.95,4.83,2.73,2.8,1.85
max,5.0,1.0,38.3,94.0,51.5,735.0,363.9,315.2,65.7,69.12,21.73,6.6,5.9,5.75,4.19,4.24,3.08


# Selecting working columns for the XGBoost algorithms

In [7]:
# We select independent variables and predicting parameter 
X = pd.DataFrame(select, columns= ['Species_ID','Origin_ID','H_m','DBH_cm','BA_sq_m_ha'])
y = pd.DataFrame(select, columns= ['GS_cub_m'])

In [8]:
# Columns in "X" data collections
X.head()

Unnamed: 0,Species_ID,Origin_ID,H_m,DBH_cm,BA_sq_m_ha
0,0,0.0,9.7,14.6,9.7
1,0,0.0,20.6,20.6,32.2
2,0,0.0,13.6,15.4,35.2
3,0,0.0,23.3,22.2,29.6
4,0,0.0,15.6,16.4,20.4


In [9]:
# Columns in "y" data collections
y.head()

Unnamed: 0,GS_cub_m
0,43.9
1,254.0
2,190.0
3,277.7
4,142.5


# Create first XGBoost model for estimate varibiality of output stats

In [10]:
#  Importing the main library for building model and its analysis
import xgboost as xgb
from scipy.stats import uniform, randint
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import cross_val_score, KFold, RandomizedSearchCV, train_test_split
from sklearn.datasets import make_classification

In [11]:
# K-Folds cross-validation for estimation "quality" of input data for building model
X_kfold = pd.DataFrame(X).to_numpy()
y_kfold = pd.DataFrame(y).to_numpy()

In [12]:
# Function for comparing datasets on homogeneity 
def display_scores(scores):
    print("    Scores: {0}\n    Mean: {1:.3f}\n    Std: {2:.3f}".format(scores, np.mean(scores), np.std(scores)))

In [13]:
# Output results of K-Folds cross-validation for XGBoost model
kfold = KFold(n_splits=3, shuffle=True)
scores = []
for train_index, test_index in kfold.split(X_kfold):   
    X_train, X_test = X_kfold[train_index], X_kfold[test_index]
    y_train, y_test = y_kfold[train_index], y_kfold[test_index]

    xgb_model = xgb.XGBRegressor(objective="reg:squarederror")
    xgb_model.fit(X_train, y_train)
    
    y_pred = xgb_model.predict(X_test)
    
    scores.append(r2_score(y_test, y_pred))
    
print('R square (R2): \n')
display_scores((scores))
print('\n', 20*'-')


scores = []
for train_index, test_index in kfold.split(X_kfold):   
    X_train, X_test = X_kfold[train_index], X_kfold[test_index]
    y_train, y_test = y_kfold[train_index], y_kfold[test_index]

    xgb_model = xgb.XGBRegressor(objective="reg:squarederror")
    xgb_model.fit(X_train, y_train)
    
    y_pred = xgb_model.predict(X_test)
      
    scores.append(mean_squared_error(y_test, y_pred))
    
print('Root-mean-square error (RMSE): \n')
display_scores(np.sqrt(scores))
print('\n', 20*'-')

scores = []
for train_index, test_index in kfold.split(X_kfold):   
    X_train, X_test = X_kfold[train_index], X_kfold[test_index]
    y_train, y_test = y_kfold[train_index], y_kfold[test_index]

    xgb_model = xgb.XGBRegressor(objective="reg:squarederror")
    xgb_model.fit(X_train, y_train)
    
    y_pred = xgb_model.predict(X_test)
    
    scores.append(mean_absolute_error(y_test, y_pred))

print('Mean absolute error (MAE): \n')
display_scores((scores))
print('\n', 20*'-')

R square (R2): 

    Scores: [0.9607346213725935, 0.976678017387603, 0.9613648104003967]
    Mean: 0.966
    Std: 0.007

 --------------------
Root-mean-square error (RMSE): 

    Scores: [26.87464665 21.32935722 28.71210094]
    Mean: 25.639
    Std: 3.138

 --------------------
Mean absolute error (MAE): 

    Scores: [16.1945002795933, 16.647184185881716, 15.222079859281841]
    Mean: 16.021
    Std: 0.595

 --------------------


# Add optimal hyperparameters for XGBoost model

In [14]:
# Hyperparameters data for the next step/stage
p1 = 0.8829191872975016               # colsample_bytree
print('Colsample_bytree: ' + str(p1))

p2 = 0.4317747333990657               # gamma
print('Gamma: ' + str(p2))

p3 = 0.17029218845451208              # learning_rate
print('Learning_rate: ' + str(p3))

p4 = 4            # max_depth
print('Max_depth: ' + str(p4))

p5 = 172         # n_estimators
print('N_estimators: ' + str(p5))

p6 = 0.894854733579894            # subsample
print('Subsample: ' + str(p6))

Colsample_bytree: 0.8829191872975016
Gamma: 0.4317747333990657
Learning_rate: 0.17029218845451208
Max_depth: 4
N_estimators: 172
Subsample: 0.894854733579894


# Estimation of a basic XGBoost model using Loops

In [15]:
# Create DMatrix (is an internal data structure) 
data_dmatrix = xgb.DMatrix(data=X,label=y)

In [49]:
# Aggregate parameters
aggr_y=[]
aggr_y_pred=[]

aggr_optimal_n=[]
aggr_bias=[]
aggr_rel_bias=[]
aggr_rmse=[]
aggr_rel_rmse=[]
aggr_mse=[]
aggr_R_square=[]

aggr_Shap_values=pd.DataFrame()

# Body loop
for i in range(20):
    
    # Generate test and training samples
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    
    # Implementation of the scikit-learn API for XGBoost regression
    xgb_model = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=p1, gamma=p2, learning_rate=p3, 
                          max_depth=p4, n_estimators=p5, subsample=p6, eval_metric=["rmse"])
    
    # Fitting the model 
    xgb_model.fit(X_train, y_train, early_stopping_rounds=20, eval_set=[(X_train, y_train), (X_test, y_test)])
    
    # learning dynamics
    y_pred = xgb_model.predict(X_test, ntree_limit=xgb_model.best_ntree_limit)
    
    # Iteration with the best result
    optimal_n= xgb_model.best_ntree_limit-1
    
    # Convert data to 'array' type
    conv_y_pred = pd.DataFrame(y_pred) # Double transformation
    y_pred2 = conv_y_pred.values
    y_test2 = y_test.values
    
    # Intermediate results
    n_sample = len(y_pred2)
    y_mean_sample = y_test.sum() / n_sample
    
    # Calculation of bias
    diff = y_pred2 - y_test2
    bias = diff.sum()/n_sample
    rel_bias = bias/y_mean_sample*100
    
    # Calculation of RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rel_rmse = rmse/y_mean_sample*100 
        
    # Calculation of MSE
    mse = mean_squared_error(y_test, y_pred)   
    
    # Calculation of Square R
    R_square = r2_score(y_test, y_pred)
    
    #Calculation of SHAP-values
    explainer = shap.TreeExplainer(xgb_model)
    shap_values = explainer.shap_values(X_train)
    vals= np.abs(shap_values).mean(0)
    feature_importance = pd.DataFrame(list(zip(X_train.columns,vals)),columns=['Feature','Importance'])
       
    
    #Add values to lists
    aggr_y.append(y_test)
    aggr_y_pred.append(conv_y_pred)
    aggr_optimal_n.append(optimal_n)
    aggr_bias.append(bias)
    aggr_rel_bias.append(rel_bias)
    aggr_rmse.append(rmse)
    aggr_rel_rmse.append(rel_rmse)
    aggr_mse.append(mse)
    aggr_R_square.append(R_square)
    aggr_Shap_values = aggr_Shap_values.append(feature_importance, ignore_index=True)
    
# Intermedia data to calculate CI
n_train = len(y_train)
n_test = len(y_test)

[0]	validation_0-rmse:225.53327	validation_1-rmse:230.39383
[1]	validation_0-rmse:190.22154	validation_1-rmse:194.63419
[2]	validation_0-rmse:159.45392	validation_1-rmse:163.29286
[3]	validation_0-rmse:133.94430	validation_1-rmse:137.68123
[4]	validation_0-rmse:113.07021	validation_1-rmse:117.11108
[5]	validation_0-rmse:95.55457	validation_1-rmse:99.53029
[6]	validation_0-rmse:81.09917	validation_1-rmse:85.72575
[7]	validation_0-rmse:68.93676	validation_1-rmse:74.37348
[8]	validation_0-rmse:59.57116	validation_1-rmse:65.21597


`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[9]	validation_0-rmse:51.34408	validation_1-rmse:57.80262
[10]	validation_0-rmse:44.75730	validation_1-rmse:51.93689
[11]	validation_0-rmse:39.25313	validation_1-rmse:47.14121
[12]	validation_0-rmse:35.12474	validation_1-rmse:43.52110
[13]	validation_0-rmse:31.47576	validation_1-rmse:40.18650
[14]	validation_0-rmse:28.65341	validation_1-rmse:37.64819
[15]	validation_0-rmse:26.25831	validation_1-rmse:35.80158
[16]	validation_0-rmse:24.44165	validation_1-rmse:34.34914
[17]	validation_0-rmse:23.07866	validation_1-rmse:33.26994
[18]	validation_0-rmse:21.87318	validation_1-rmse:32.48729
[19]	validation_0-rmse:20.93198	validation_1-rmse:31.58333
[20]	validation_0-rmse:20.13797	validation_1-rmse:31.05474
[21]	validation_0-rmse:19.37177	validation_1-rmse:30.68889
[22]	validation_0-rmse:18.90234	validation_1-rmse:30.37201
[23]	validation_0-rmse:18.41712	validation_1-rmse:30.03485
[24]	validation_0-rmse:18.09291	validation_1-rmse:29.82811
[25]	validation_0-rmse:17.64342	validation_1-rmse:29.4372

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:230.02658	validation_1-rmse:208.70962
[1]	validation_0-rmse:194.21704	validation_1-rmse:175.42270


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[2]	validation_0-rmse:163.21028	validation_1-rmse:146.66795
[3]	validation_0-rmse:137.46692	validation_1-rmse:122.77635
[4]	validation_0-rmse:116.37716	validation_1-rmse:102.73539
[5]	validation_0-rmse:98.46882	validation_1-rmse:87.15632
[6]	validation_0-rmse:83.41169	validation_1-rmse:73.87668
[7]	validation_0-rmse:71.48616	validation_1-rmse:63.32186
[8]	validation_0-rmse:61.81480	validation_1-rmse:55.01459
[9]	validation_0-rmse:53.36897	validation_1-rmse:47.92880
[10]	validation_0-rmse:46.39241	validation_1-rmse:42.28059
[11]	validation_0-rmse:40.70496	validation_1-rmse:37.73260
[12]	validation_0-rmse:36.48828	validation_1-rmse:34.48584
[13]	validation_0-rmse:32.70448	validation_1-rmse:31.86738
[14]	validation_0-rmse:29.91646	validation_1-rmse:29.92831
[15]	validation_0-rmse:27.61038	validation_1-rmse:28.43410
[16]	validation_0-rmse:25.74383	validation_1-rmse:27.44612
[17]	validation_0-rmse:24.20943	validation_1-rmse:26.62069
[18]	validation_0-rmse:23.06155	validation_1-rmse:26.11900

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:226.34799	validation_1-rmse:224.94592
[1]	validation_0-rmse:190.90593	validation_1-rmse:189.27516
[2]	validation_0-rmse:160.22767	validation_1-rmse:158.58901
[3]	validation_0-rmse:134.75322	validation_1-rmse:132.88197


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[4]	validation_0-rmse:113.88249	validation_1-rmse:112.40317
[5]	validation_0-rmse:96.31817	validation_1-rmse:94.90866
[6]	validation_0-rmse:81.84061	validation_1-rmse:80.66119
[7]	validation_0-rmse:70.17696	validation_1-rmse:69.39347
[8]	validation_0-rmse:60.70606	validation_1-rmse:60.15857
[9]	validation_0-rmse:52.53790	validation_1-rmse:52.27088
[10]	validation_0-rmse:45.79738	validation_1-rmse:45.82478
[11]	validation_0-rmse:40.39216	validation_1-rmse:40.74286
[12]	validation_0-rmse:36.06503	validation_1-rmse:36.89219
[13]	validation_0-rmse:32.60166	validation_1-rmse:33.80432
[14]	validation_0-rmse:29.75108	validation_1-rmse:31.19711
[15]	validation_0-rmse:27.37999	validation_1-rmse:28.84960
[16]	validation_0-rmse:25.60847	validation_1-rmse:27.54207
[17]	validation_0-rmse:24.07083	validation_1-rmse:26.36443
[18]	validation_0-rmse:22.82743	validation_1-rmse:25.52947
[19]	validation_0-rmse:21.92065	validation_1-rmse:24.90471
[20]	validation_0-rmse:21.21949	validation_1-rmse:24.40332
[

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[0]	validation_0-rmse:227.37770	validation_1-rmse:221.89354
[1]	validation_0-rmse:192.13687	validation_1-rmse:188.38526
[2]	validation_0-rmse:161.36544	validation_1-rmse:159.30850
[3]	validation_0-rmse:135.70372	validation_1-rmse:133.90334
[4]	validation_0-rmse:114.69908	validation_1-rmse:113.51510
[5]	validation_0-rmse:96.98226	validation_1-rmse:96.62857
[6]	validation_0-rmse:82.31337	validation_1-rmse:82.55564
[7]	validation_0-rmse:70.29860	validation_1-rmse:71.52084
[8]	validation_0-rmse:60.91080	validation_1-rmse:63.32897
[9]	validation_0-rmse:52.48934	validation_1-rmse:55.62409
[10]	validation_0-rmse:45.89243	validation_1-rmse:49.24210
[11]	validation_0-rmse:40.49082	validation_1-rmse:44.04605
[12]	validation_0-rmse:36.26329	validation_1-rmse:40.76582
[13]	validation_0-rmse:32.56957	validation_1-rmse:37.08464
[14]	validation_0-rmse:29.55568	validation_1-rmse:34.41486
[15]	validation_0-rmse:27.14743	validation_1-rmse:32.24296
[16]	validation_0-rmse:25.24397	validation_1-rmse:30.651

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:226.85123	validation_1-rmse:226.41087
[1]	validation_0-rmse:191.61225	validation_1-rmse:191.62463
[2]	validation_0-rmse:160.83502	validation_1-rmse:162.18903
[3]	validation_0-rmse:135.14802	validation_1-rmse:137.49945
[4]	validation_0-rmse:114.24833	validation_1-rmse:117.82242
[5]	validation_0-rmse:96.74417	validation_1-rmse:100.79362
[6]	validation_0-rmse:82.08060	validation_1-rmse:86.59288
[7]	validation_0-rmse:69.93408	validation_1-rmse:74.89504
[8]	validation_0-rmse:60.42181	validation_1-rmse:65.61008
[9]	validation_0-rmse:52.24755	validation_1-rmse:57.89668
[10]	validation_0-rmse:45.44632	validation_1-rmse:51.68599
[11]	validation_0-rmse:40.11229	validation_1-rmse:46.62541
[12]	validation_0-rmse:36.00717	validation_1-rmse:43.08829
[13]	validation_0-rmse:32.30440	validation_1-rmse:39.74522
[14]	validation_0-rmse:29.41928	validation_1-rmse:37.12334


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[15]	validation_0-rmse:27.24355	validation_1-rmse:35.17593
[16]	validation_0-rmse:25.27768	validation_1-rmse:33.71442
[17]	validation_0-rmse:23.84460	validation_1-rmse:32.55137
[18]	validation_0-rmse:22.80235	validation_1-rmse:31.63039
[19]	validation_0-rmse:21.77581	validation_1-rmse:30.88219
[20]	validation_0-rmse:20.89715	validation_1-rmse:30.33116
[21]	validation_0-rmse:20.34736	validation_1-rmse:29.74403
[22]	validation_0-rmse:19.77036	validation_1-rmse:29.31362
[23]	validation_0-rmse:19.39700	validation_1-rmse:28.80938
[24]	validation_0-rmse:18.98804	validation_1-rmse:28.60232
[25]	validation_0-rmse:18.59723	validation_1-rmse:28.28646
[26]	validation_0-rmse:18.22988	validation_1-rmse:27.97625
[27]	validation_0-rmse:17.92314	validation_1-rmse:27.94111
[28]	validation_0-rmse:17.52068	validation_1-rmse:27.65604
[29]	validation_0-rmse:17.27046	validation_1-rmse:27.63138
[30]	validation_0-rmse:17.09376	validation_1-rmse:27.44375
[31]	validation_0-rmse:16.80161	validation_1-rmse:27.465

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:226.52279	validation_1-rmse:226.97035
[1]	validation_0-rmse:191.22597	validation_1-rmse:193.29306
[2]	validation_0-rmse:160.67062	validation_1-rmse:163.94213
[3]	validation_0-rmse:135.06109	validation_1-rmse:139.15985
[4]	validation_0-rmse:114.22383	validation_1-rmse:119.88057


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[5]	validation_0-rmse:96.47999	validation_1-rmse:102.77752
[6]	validation_0-rmse:81.83874	validation_1-rmse:88.94839
[7]	validation_0-rmse:69.69209	validation_1-rmse:78.03441
[8]	validation_0-rmse:60.11212	validation_1-rmse:69.60435
[9]	validation_0-rmse:51.77379	validation_1-rmse:62.07519
[10]	validation_0-rmse:44.85900	validation_1-rmse:56.31074
[11]	validation_0-rmse:39.23273	validation_1-rmse:51.66022
[12]	validation_0-rmse:35.05564	validation_1-rmse:48.15355
[13]	validation_0-rmse:31.29778	validation_1-rmse:45.09727
[14]	validation_0-rmse:28.34858	validation_1-rmse:42.86261
[15]	validation_0-rmse:25.86999	validation_1-rmse:41.11691
[16]	validation_0-rmse:23.97808	validation_1-rmse:39.48605
[17]	validation_0-rmse:22.52377	validation_1-rmse:38.39255
[18]	validation_0-rmse:21.30490	validation_1-rmse:37.42404
[19]	validation_0-rmse:20.26519	validation_1-rmse:36.63291
[20]	validation_0-rmse:19.50524	validation_1-rmse:36.22933
[21]	validation_0-rmse:18.84690	validation_1-rmse:35.74887
[

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:227.88701	validation_1-rmse:219.00535
[1]	validation_0-rmse:191.97778	validation_1-rmse:183.53488
[2]	validation_0-rmse:161.36050	validation_1-rmse:154.09694


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[3]	validation_0-rmse:135.92372	validation_1-rmse:129.80826
[4]	validation_0-rmse:114.83853	validation_1-rmse:109.17076
[5]	validation_0-rmse:97.07702	validation_1-rmse:91.50727
[6]	validation_0-rmse:82.57235	validation_1-rmse:77.34786
[7]	validation_0-rmse:70.73199	validation_1-rmse:66.40166
[8]	validation_0-rmse:61.31009	validation_1-rmse:57.66199
[9]	validation_0-rmse:53.19649	validation_1-rmse:50.01830
[10]	validation_0-rmse:46.63956	validation_1-rmse:43.83013
[11]	validation_0-rmse:41.01172	validation_1-rmse:38.54153
[12]	validation_0-rmse:36.76536	validation_1-rmse:34.96362
[13]	validation_0-rmse:32.98013	validation_1-rmse:31.46041
[14]	validation_0-rmse:29.97182	validation_1-rmse:28.85500
[15]	validation_0-rmse:27.71794	validation_1-rmse:27.07336
[16]	validation_0-rmse:25.81124	validation_1-rmse:26.03015
[17]	validation_0-rmse:24.29026	validation_1-rmse:25.04810
[18]	validation_0-rmse:23.07727	validation_1-rmse:24.45978
[19]	validation_0-rmse:22.04330	validation_1-rmse:23.89450


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:227.63581	validation_1-rmse:221.27310
[1]	validation_0-rmse:192.49473	validation_1-rmse:184.25531
[2]	validation_0-rmse:161.94501	validation_1-rmse:154.73106
[3]	validation_0-rmse:136.14440	validation_1-rmse:128.97288
[4]	validation_0-rmse:114.80199	validation_1-rmse:107.94647
[5]	validation_0-rmse:97.01165	validation_1-rmse:90.85626
[6]	validation_0-rmse:82.50391	validation_1-rmse:77.45072
[7]	validation_0-rmse:70.49271	validation_1-rmse:66.04490
[8]	validation_0-rmse:60.80286	validation_1-rmse:56.96710
[9]	validation_0-rmse:52.52465	validation_1-rmse:49.94029
[10]	validation_0-rmse:45.80655	validation_1-rmse:43.70425
[11]	validation_0-rmse:40.26685	validation_1-rmse:38.97268
[12]	validation_0-rmse:36.00967	validation_1-rmse:35.78115
[13]	validation_0-rmse:32.26295	validation_1-rmse:32.95342


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[14]	validation_0-rmse:29.42164	validation_1-rmse:30.75630
[15]	validation_0-rmse:27.13573	validation_1-rmse:29.14115
[16]	validation_0-rmse:25.34998	validation_1-rmse:28.07126
[17]	validation_0-rmse:23.88586	validation_1-rmse:27.12049
[18]	validation_0-rmse:22.73449	validation_1-rmse:26.65802
[19]	validation_0-rmse:21.74412	validation_1-rmse:26.05309
[20]	validation_0-rmse:20.90506	validation_1-rmse:25.68802
[21]	validation_0-rmse:20.16303	validation_1-rmse:25.42245
[22]	validation_0-rmse:19.63011	validation_1-rmse:25.18139
[23]	validation_0-rmse:19.31271	validation_1-rmse:25.11752
[24]	validation_0-rmse:18.93040	validation_1-rmse:24.97728
[25]	validation_0-rmse:18.66176	validation_1-rmse:24.90179
[26]	validation_0-rmse:18.29914	validation_1-rmse:24.78762
[27]	validation_0-rmse:18.01874	validation_1-rmse:24.68126
[28]	validation_0-rmse:17.71699	validation_1-rmse:24.42843
[29]	validation_0-rmse:17.54316	validation_1-rmse:24.37501
[30]	validation_0-rmse:17.35522	validation_1-rmse:24.198

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:223.30237	validation_1-rmse:238.59415
[1]	validation_0-rmse:188.46267	validation_1-rmse:201.41917
[2]	validation_0-rmse:158.38545	validation_1-rmse:169.40316
[3]	validation_0-rmse:133.28149	validation_1-rmse:142.50897
[4]	validation_0-rmse:112.68274	validation_1-rmse:120.61409
[5]	validation_0-rmse:95.21431	validation_1-rmse:101.93159


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[6]	validation_0-rmse:80.92203	validation_1-rmse:86.76741
[7]	validation_0-rmse:69.16201	validation_1-rmse:74.54711
[8]	validation_0-rmse:59.86234	validation_1-rmse:64.75123
[9]	validation_0-rmse:51.63517	validation_1-rmse:55.56975
[10]	validation_0-rmse:45.07668	validation_1-rmse:48.41832
[11]	validation_0-rmse:39.86257	validation_1-rmse:42.89430
[12]	validation_0-rmse:35.77262	validation_1-rmse:38.83174
[13]	validation_0-rmse:32.07539	validation_1-rmse:35.10125
[14]	validation_0-rmse:29.18898	validation_1-rmse:32.20789
[15]	validation_0-rmse:26.93933	validation_1-rmse:30.29691
[16]	validation_0-rmse:25.23072	validation_1-rmse:29.08402
[17]	validation_0-rmse:23.69079	validation_1-rmse:27.61045
[18]	validation_0-rmse:22.55579	validation_1-rmse:26.60490
[19]	validation_0-rmse:21.66140	validation_1-rmse:25.99808
[20]	validation_0-rmse:20.99733	validation_1-rmse:25.27777
[21]	validation_0-rmse:20.32663	validation_1-rmse:24.86953
[22]	validation_0-rmse:19.80929	validation_1-rmse:24.74222
[

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:224.35898	validation_1-rmse:235.51952
[1]	validation_0-rmse:189.33394	validation_1-rmse:200.61602
[2]	validation_0-rmse:159.02223	validation_1-rmse:170.21937
[3]	validation_0-rmse:133.81118	validation_1-rmse:144.92052
[4]	validation_0-rmse:113.13466	validation_1-rmse:124.58577
[5]	validation_0-rmse:95.63919	validation_1-rmse:107.18411
[6]	validation_0-rmse:81.29441	validation_1-rmse:92.69444


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[7]	validation_0-rmse:69.39297	validation_1-rmse:80.64170
[8]	validation_0-rmse:60.14589	validation_1-rmse:71.41950
[9]	validation_0-rmse:51.98848	validation_1-rmse:63.06542
[10]	validation_0-rmse:45.33878	validation_1-rmse:56.82338
[11]	validation_0-rmse:39.87865	validation_1-rmse:51.41977
[12]	validation_0-rmse:35.65610	validation_1-rmse:47.01096
[13]	validation_0-rmse:32.01881	validation_1-rmse:43.51973
[14]	validation_0-rmse:29.08787	validation_1-rmse:40.97365
[15]	validation_0-rmse:26.71574	validation_1-rmse:38.87031
[16]	validation_0-rmse:24.87147	validation_1-rmse:37.22822
[17]	validation_0-rmse:23.45914	validation_1-rmse:35.99754
[18]	validation_0-rmse:22.31876	validation_1-rmse:34.92717
[19]	validation_0-rmse:21.41975	validation_1-rmse:33.96530
[20]	validation_0-rmse:20.69419	validation_1-rmse:33.19510
[21]	validation_0-rmse:20.06822	validation_1-rmse:32.55708
[22]	validation_0-rmse:19.46997	validation_1-rmse:32.17867
[23]	validation_0-rmse:18.89551	validation_1-rmse:31.72804


ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:224.92331	validation_1-rmse:231.34514
[1]	validation_0-rmse:190.01294	validation_1-rmse:195.98173
[2]	validation_0-rmse:159.66623	validation_1-rmse:164.74858
[3]	validation_0-rmse:134.62167	validation_1-rmse:138.99677
[4]	validation_0-rmse:113.92673	validation_1-rmse:118.29908
[5]	validation_0-rmse:96.28189	validation_1-rmse:100.20120
[6]	validation_0-rmse:81.88115	validation_1-rmse:85.66419


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[7]	validation_0-rmse:69.96601	validation_1-rmse:73.92184
[8]	validation_0-rmse:60.72967	validation_1-rmse:64.74264
[9]	validation_0-rmse:52.43731	validation_1-rmse:56.19727
[10]	validation_0-rmse:45.67770	validation_1-rmse:49.60427
[11]	validation_0-rmse:40.17268	validation_1-rmse:44.57981
[12]	validation_0-rmse:36.03878	validation_1-rmse:40.75304
[13]	validation_0-rmse:32.42624	validation_1-rmse:37.20933
[14]	validation_0-rmse:29.53774	validation_1-rmse:34.70425
[15]	validation_0-rmse:27.13768	validation_1-rmse:32.49460
[16]	validation_0-rmse:25.23793	validation_1-rmse:30.83542
[17]	validation_0-rmse:23.81183	validation_1-rmse:29.48440
[18]	validation_0-rmse:22.66526	validation_1-rmse:28.44979
[19]	validation_0-rmse:21.69555	validation_1-rmse:27.58815
[20]	validation_0-rmse:20.83535	validation_1-rmse:26.90620
[21]	validation_0-rmse:20.16761	validation_1-rmse:26.35138
[22]	validation_0-rmse:19.66372	validation_1-rmse:25.86306
[23]	validation_0-rmse:19.29244	validation_1-rmse:25.54931


ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:225.00125	validation_1-rmse:232.56761
[1]	validation_0-rmse:190.02293	validation_1-rmse:198.26385


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[2]	validation_0-rmse:159.64573	validation_1-rmse:167.24544
[3]	validation_0-rmse:134.17294	validation_1-rmse:141.45664
[4]	validation_0-rmse:113.29665	validation_1-rmse:119.69927
[5]	validation_0-rmse:95.95010	validation_1-rmse:103.02851
[6]	validation_0-rmse:81.52538	validation_1-rmse:88.55883
[7]	validation_0-rmse:69.38240	validation_1-rmse:76.93466
[8]	validation_0-rmse:60.00669	validation_1-rmse:68.32908
[9]	validation_0-rmse:51.70981	validation_1-rmse:60.23286
[10]	validation_0-rmse:44.91909	validation_1-rmse:54.09010
[11]	validation_0-rmse:39.46737	validation_1-rmse:49.04664
[12]	validation_0-rmse:35.30535	validation_1-rmse:45.37208
[13]	validation_0-rmse:31.71819	validation_1-rmse:42.17249
[14]	validation_0-rmse:28.97319	validation_1-rmse:39.69866
[15]	validation_0-rmse:26.66765	validation_1-rmse:37.54827
[16]	validation_0-rmse:24.91956	validation_1-rmse:35.81424
[17]	validation_0-rmse:23.41757	validation_1-rmse:34.39903
[18]	validation_0-rmse:22.27164	validation_1-rmse:33.4744

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:222.79391	validation_1-rmse:240.79751
[1]	validation_0-rmse:187.58415	validation_1-rmse:204.50060
[2]	validation_0-rmse:157.59893	validation_1-rmse:173.26836
[3]	validation_0-rmse:132.70552	validation_1-rmse:146.88069
[4]	validation_0-rmse:112.04431	validation_1-rmse:125.90252
[5]	validation_0-rmse:94.60010	validation_1-rmse:107.55921
[6]	validation_0-rmse:80.27571	validation_1-rmse:92.99991
[7]	validation_0-rmse:68.53135	validation_1-rmse:80.89715
[8]	validation_0-rmse:59.23877	validation_1-rmse:71.49825
[9]	validation_0-rmse:51.06736	validation_1-rmse:62.98432
[10]	validation_0-rmse:44.45237	validation_1-rmse:56.10490
[11]	validation_0-rmse:39.11010	validation_1-rmse:50.81300
[12]	validation_0-rmse:34.86391	validation_1-rmse:46.34322


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[13]	validation_0-rmse:31.34156	validation_1-rmse:42.72540
[14]	validation_0-rmse:28.56925	validation_1-rmse:39.77890
[15]	validation_0-rmse:26.23031	validation_1-rmse:37.32036
[16]	validation_0-rmse:24.32111	validation_1-rmse:35.48225
[17]	validation_0-rmse:22.88055	validation_1-rmse:33.96877
[18]	validation_0-rmse:21.71384	validation_1-rmse:33.04619
[19]	validation_0-rmse:20.74067	validation_1-rmse:31.99317
[20]	validation_0-rmse:20.02883	validation_1-rmse:31.28205
[21]	validation_0-rmse:19.43536	validation_1-rmse:30.73088
[22]	validation_0-rmse:18.80564	validation_1-rmse:30.25366
[23]	validation_0-rmse:18.45708	validation_1-rmse:29.95448
[24]	validation_0-rmse:18.05777	validation_1-rmse:29.69854
[25]	validation_0-rmse:17.49189	validation_1-rmse:29.38957
[26]	validation_0-rmse:17.12425	validation_1-rmse:29.13514
[27]	validation_0-rmse:16.85973	validation_1-rmse:28.96942
[28]	validation_0-rmse:16.74765	validation_1-rmse:28.91408
[29]	validation_0-rmse:16.55403	validation_1-rmse:28.654

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:225.08853	validation_1-rmse:231.99544
[1]	validation_0-rmse:189.78366	validation_1-rmse:195.89169


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[2]	validation_0-rmse:159.22354	validation_1-rmse:165.61077
[3]	validation_0-rmse:133.81883	validation_1-rmse:139.90853
[4]	validation_0-rmse:113.05438	validation_1-rmse:119.30628
[5]	validation_0-rmse:95.55590	validation_1-rmse:102.28277
[6]	validation_0-rmse:81.18805	validation_1-rmse:88.79297
[7]	validation_0-rmse:69.22619	validation_1-rmse:77.37210
[8]	validation_0-rmse:59.88380	validation_1-rmse:68.49439
[9]	validation_0-rmse:51.84779	validation_1-rmse:60.88183
[10]	validation_0-rmse:45.04271	validation_1-rmse:54.76101
[11]	validation_0-rmse:39.65519	validation_1-rmse:50.45635
[12]	validation_0-rmse:35.62322	validation_1-rmse:46.19007
[13]	validation_0-rmse:31.91812	validation_1-rmse:42.88750
[14]	validation_0-rmse:29.08508	validation_1-rmse:40.41766
[15]	validation_0-rmse:26.54166	validation_1-rmse:38.43354
[16]	validation_0-rmse:24.85143	validation_1-rmse:36.69830
[17]	validation_0-rmse:23.44554	validation_1-rmse:35.12968
[18]	validation_0-rmse:22.43094	validation_1-rmse:34.2504

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:225.60775	validation_1-rmse:228.79999
[1]	validation_0-rmse:190.34347	validation_1-rmse:193.23591
[2]	validation_0-rmse:159.82360	validation_1-rmse:162.61955
[3]	validation_0-rmse:134.62032	validation_1-rmse:137.11056
[4]	validation_0-rmse:113.93309	validation_1-rmse:116.81432
[5]	validation_0-rmse:96.44522	validation_1-rmse:98.98041
[6]	validation_0-rmse:81.94885	validation_1-rmse:84.48659
[7]	validation_0-rmse:69.96024	validation_1-rmse:72.22248
[8]	validation_0-rmse:60.65358	validation_1-rmse:62.89079
[9]	validation_0-rmse:52.45864	validation_1-rmse:54.40013
[10]	validation_0-rmse:45.78101	validation_1-rmse:47.49383
[11]	validation_0-rmse:40.27206	validation_1-rmse:42.19376
[12]	validation_0-rmse:36.11667	validation_1-rmse:38.33485
[13]	validation_0-rmse:32.44043	validation_1-rmse:35.15550


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[14]	validation_0-rmse:29.57967	validation_1-rmse:32.38255
[15]	validation_0-rmse:27.19524	validation_1-rmse:30.05939
[16]	validation_0-rmse:25.49373	validation_1-rmse:28.62098
[17]	validation_0-rmse:24.06467	validation_1-rmse:27.14716
[18]	validation_0-rmse:22.77312	validation_1-rmse:26.22737
[19]	validation_0-rmse:21.70355	validation_1-rmse:25.56666
[20]	validation_0-rmse:20.96749	validation_1-rmse:25.10669
[21]	validation_0-rmse:20.30395	validation_1-rmse:24.55507
[22]	validation_0-rmse:19.73148	validation_1-rmse:24.12018
[23]	validation_0-rmse:19.23869	validation_1-rmse:23.54808
[24]	validation_0-rmse:18.87662	validation_1-rmse:23.36834
[25]	validation_0-rmse:18.42020	validation_1-rmse:23.19489
[26]	validation_0-rmse:18.14404	validation_1-rmse:23.01996
[27]	validation_0-rmse:17.96748	validation_1-rmse:23.00481
[28]	validation_0-rmse:17.71482	validation_1-rmse:22.93398
[29]	validation_0-rmse:17.48628	validation_1-rmse:22.91737
[30]	validation_0-rmse:17.32794	validation_1-rmse:22.739

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[1]	validation_0-rmse:191.76843	validation_1-rmse:188.80536
[2]	validation_0-rmse:161.17990	validation_1-rmse:157.67918
[3]	validation_0-rmse:135.69434	validation_1-rmse:132.68206
[4]	validation_0-rmse:114.88926	validation_1-rmse:112.10522
[5]	validation_0-rmse:97.35557	validation_1-rmse:94.47819
[6]	validation_0-rmse:82.71441	validation_1-rmse:80.03836
[7]	validation_0-rmse:70.61036	validation_1-rmse:67.87066
[8]	validation_0-rmse:60.91767	validation_1-rmse:59.20372
[9]	validation_0-rmse:52.69452	validation_1-rmse:50.95121
[10]	validation_0-rmse:46.08731	validation_1-rmse:44.63046
[11]	validation_0-rmse:40.55465	validation_1-rmse:39.68393
[12]	validation_0-rmse:36.30496	validation_1-rmse:36.03248
[13]	validation_0-rmse:32.71896	validation_1-rmse:33.16861
[14]	validation_0-rmse:29.77644	validation_1-rmse:31.14500
[15]	validation_0-rmse:27.45440	validation_1-rmse:29.31558
[16]	validation_0-rmse:25.56954	validation_1-rmse:27.66623
[17]	validation_0-rmse:24.16558	validation_1-rmse:26.7482

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:227.82676	validation_1-rmse:218.56251
[1]	validation_0-rmse:191.94281	validation_1-rmse:183.97435
[2]	validation_0-rmse:161.09915	validation_1-rmse:153.89518
[3]	validation_0-rmse:135.65228	validation_1-rmse:129.60392
[4]	validation_0-rmse:114.80271	validation_1-rmse:109.70285
[5]	validation_0-rmse:97.23047	validation_1-rmse:92.65450
[6]	validation_0-rmse:82.47357	validation_1-rmse:78.61582


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[7]	validation_0-rmse:70.21021	validation_1-rmse:67.08257
[8]	validation_0-rmse:60.74089	validation_1-rmse:58.17388
[9]	validation_0-rmse:52.38743	validation_1-rmse:50.74161
[10]	validation_0-rmse:45.57382	validation_1-rmse:44.97074
[11]	validation_0-rmse:40.15389	validation_1-rmse:40.23673
[12]	validation_0-rmse:35.75833	validation_1-rmse:36.60004
[13]	validation_0-rmse:32.09103	validation_1-rmse:33.43049
[14]	validation_0-rmse:29.17187	validation_1-rmse:31.29200
[15]	validation_0-rmse:26.70235	validation_1-rmse:29.47450
[16]	validation_0-rmse:24.80988	validation_1-rmse:28.31885
[17]	validation_0-rmse:23.28845	validation_1-rmse:27.41994
[18]	validation_0-rmse:22.12544	validation_1-rmse:26.59563
[19]	validation_0-rmse:21.16599	validation_1-rmse:26.26194
[20]	validation_0-rmse:20.26715	validation_1-rmse:25.88868
[21]	validation_0-rmse:19.58786	validation_1-rmse:25.65392
[22]	validation_0-rmse:19.05758	validation_1-rmse:25.50072
[23]	validation_0-rmse:18.62608	validation_1-rmse:25.46275


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[0]	validation_0-rmse:223.72322	validation_1-rmse:236.95318
[1]	validation_0-rmse:188.42518	validation_1-rmse:201.47906
[2]	validation_0-rmse:158.44400	validation_1-rmse:170.32137
[3]	validation_0-rmse:133.23503	validation_1-rmse:144.14703
[4]	validation_0-rmse:112.37742	validation_1-rmse:122.91764
[5]	validation_0-rmse:94.85241	validation_1-rmse:104.82663
[6]	validation_0-rmse:80.59794	validation_1-rmse:90.07388
[7]	validation_0-rmse:68.77538	validation_1-rmse:78.48560
[8]	validation_0-rmse:59.46370	validation_1-rmse:69.23962
[9]	validation_0-rmse:51.47153	validation_1-rmse:61.20376
[10]	validation_0-rmse:44.83723	validation_1-rmse:54.53612
[11]	validation_0-rmse:39.24996	validation_1-rmse:49.13326
[12]	validation_0-rmse:35.22218	validation_1-rmse:45.54819
[13]	validation_0-rmse:31.68310	validation_1-rmse:42.03800
[14]	validation_0-rmse:29.01189	validation_1-rmse:39.51463
[15]	validation_0-rmse:26.70677	validation_1-rmse:37.40501
[16]	validation_0-rmse:24.72513	validation_1-rmse:35.63

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


[0]	validation_0-rmse:226.42408	validation_1-rmse:225.25243
[1]	validation_0-rmse:191.32938	validation_1-rmse:191.21672
[2]	validation_0-rmse:160.66760	validation_1-rmse:160.71569
[3]	validation_0-rmse:135.13680	validation_1-rmse:134.75274
[4]	validation_0-rmse:114.16456	validation_1-rmse:113.94172
[5]	validation_0-rmse:96.63703	validation_1-rmse:96.28051


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[6]	validation_0-rmse:81.92730	validation_1-rmse:81.70730
[7]	validation_0-rmse:69.90636	validation_1-rmse:69.71929
[8]	validation_0-rmse:60.32833	validation_1-rmse:61.39878
[9]	validation_0-rmse:52.13327	validation_1-rmse:53.60716
[10]	validation_0-rmse:45.42099	validation_1-rmse:47.02142
[11]	validation_0-rmse:39.94261	validation_1-rmse:41.89280
[12]	validation_0-rmse:35.76081	validation_1-rmse:38.47398
[13]	validation_0-rmse:32.06899	validation_1-rmse:35.38495
[14]	validation_0-rmse:29.19782	validation_1-rmse:32.85150
[15]	validation_0-rmse:26.99506	validation_1-rmse:31.23709
[16]	validation_0-rmse:25.01967	validation_1-rmse:29.52537
[17]	validation_0-rmse:23.40337	validation_1-rmse:28.33780
[18]	validation_0-rmse:22.37098	validation_1-rmse:27.78823
[19]	validation_0-rmse:21.40387	validation_1-rmse:27.19070
[20]	validation_0-rmse:20.61758	validation_1-rmse:26.56665
[21]	validation_0-rmse:20.10179	validation_1-rmse:26.39161
[22]	validation_0-rmse:19.50715	validation_1-rmse:26.17091
[

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[0]	validation_0-rmse:227.20396	validation_1-rmse:222.54237
[1]	validation_0-rmse:191.66924	validation_1-rmse:186.28142
[2]	validation_0-rmse:161.03464	validation_1-rmse:155.96524
[3]	validation_0-rmse:135.73721	validation_1-rmse:131.23278
[4]	validation_0-rmse:114.78175	validation_1-rmse:110.23419
[5]	validation_0-rmse:96.99916	validation_1-rmse:93.54743
[6]	validation_0-rmse:82.20273	validation_1-rmse:79.43215
[7]	validation_0-rmse:70.13002	validation_1-rmse:68.20079
[8]	validation_0-rmse:60.39423	validation_1-rmse:58.81875
[9]	validation_0-rmse:52.03824	validation_1-rmse:51.41832
[10]	validation_0-rmse:45.29911	validation_1-rmse:46.16288
[11]	validation_0-rmse:39.87963	validation_1-rmse:41.48260
[12]	validation_0-rmse:35.63489	validation_1-rmse:37.55399
[13]	validation_0-rmse:32.02101	validation_1-rmse:34.99907
[14]	validation_0-rmse:29.16011	validation_1-rmse:33.00086
[15]	validation_0-rmse:26.90481	validation_1-rmse:31.62772
[16]	validation_0-rmse:25.18702	validation_1-rmse:30.435

ntree_limit is deprecated, use `iteration_range` or model slicing instead.
ntree_limit is deprecated, use `iteration_range` or model slicing instead.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


In [71]:
pd_aggr_y = pd.concat(aggr_y, join="inner")
pd_aggr_y['Obs'] = pd_aggr_y.index

pd_aggr_y_pred = pd.concat(aggr_y_pred, join="inner")

results_y = pd.concat([pd_aggr_y.reset_index(), pd_aggr_y_pred.reset_index()], axis=1)

print(pd_aggr_y)
print(pd_aggr_y_pred)
print(results_y)

     GS_cub_m  Obs
709     282.4  709
762     169.4  762
600     399.5  600
80      121.0   80
344     300.0  344
..        ...  ...
144      69.0  144
798     204.5  798
415     549.0  415
280     118.8  280
665     329.9  665

[3440 rows x 2 columns]
              0
0    280.628754
1    163.887863
2    371.982513
3    120.444695
4    348.378845
..          ...
167   77.107063
168  167.320953
169  548.312317
170  111.425316
171  330.085052

[3440 rows x 1 columns]
      index  GS_cub_m  Obs  index           0
0       709     282.4  709      0  280.628754
1       762     169.4  762      1  163.887863
2       600     399.5  600      2  371.982513
3        80     121.0   80      3  120.444695
4       344     300.0  344      4  348.378845
...     ...       ...  ...    ...         ...
3435    144      69.0  144    167   77.107063
3436    798     204.5  798    168  167.320953
3437    415     549.0  415    169  548.312317
3438    280     118.8  280    170  111.425316
3439    665     329.9  6

In [None]:
# Intermedia ratio for calculation CI
CIR = 2/((n_test + n_train)/n_test)**0.5

In [18]:
# Statistic for SHAP-values

number_iteration = len(aggr_Shap_values.groupby(['Feature']))
shap_list = aggr_Shap_values.groupby(['Feature'])


for i in range(number_iteration):
    itr_mean = round(np.mean(list(shap_list)[i][1].iloc[0:, 1]), 3)
    itr_Q50 = round(np.quantile(list(shap_list)[i][1].iloc[0:, 1], 0.50), 3)
    itr_Q16 = round(np.quantile(list(shap_list)[i][1].iloc[0:, 1], 0.16), 3)
    itr_Q84 = round(np.quantile(list(shap_list)[i][1].iloc[0:, 1], 0.84), 3)
    itr_l = (itr_Q50 - itr_Q16)*CIR
    l_CI = itr_Q50 - itr_l
    itr_t = (itr_Q84 - itr_Q50)*CIR
    t_CI = itr_Q50 + itr_t
    print(f"{list(shap_list)[i][0]}: mean - {itr_mean}, median - {itr_Q50}, CI - [{round(l_CI, 3)}-{round(t_CI, 3)}]\n")
print("\n")          

BA_sq_m_ha: mean - 72.72, median - 73.411, CI - [71.49-74.074]

DBH_cm: mean - 5.934, median - 5.532, CI - [5.041-6.648]

H_m: mean - 61.469, median - 61.719, CI - [60.342-63.142]

Origin_ID: mean - 2.762, median - 2.777, CI - [2.356-3.099]

Species_ID: mean - 4.524, median - 4.58, CI - [4.148-4.887]





In [None]:
sns.set(font_scale=1.2)
sns.violinplot(x="Importance", y="Feature", data=aggr_Shap_values, inner="box", palette="Spectral", order=['BA_sq_m_ha', 'H_m', 'DBH_cm', 'Species_ID', 'Origin_ID'])
sns.set(rc={'figure.figsize':(15,9)})
plt.show()

In [None]:
# Aggregate stat
# optimal number of iterations
aggr_optimal_n_mean = np.mean(aggr_optimal_n)
aggr_optimal_n_Q025 = np.quantile(aggr_optimal_n, 0.025)
aggr_optimal_n_Q16 = np.quantile(aggr_optimal_n, 0.16)
aggr_optimal_n_Q50 = np.quantile(aggr_optimal_n, 0.50)
aggr_optimal_n_Q84 = np.quantile(aggr_optimal_n, 0.84)
aggr_optimal_n_Q975 = np.quantile(aggr_optimal_n, 0.975)

# bias
aggr_bias_mean = np.mean(aggr_bias)
aggr_bias_Q025 = np.quantile(aggr_bias, 0.025)
aggr_bias_Q16 = np.quantile(aggr_bias, 0.16)
aggr_bias_Q50 = np.quantile(aggr_bias, 0.50)
aggr_bias_Q84 = np.quantile(aggr_bias, 0.84)
aggr_bias_Q975 = np.quantile(aggr_bias, 0.975)

aggr_rel_bias_mean = np.mean(aggr_rel_bias)
aggr_rel_bias_Q025 = np.quantile(aggr_rel_bias, 0.025)
aggr_rel_bias_Q16 = np.quantile(aggr_rel_bias, 0.16)
aggr_rel_bias_Q50 = np.quantile(aggr_rel_bias, 0.50)
aggr_rel_bias_Q84 = np.quantile(aggr_rel_bias, 0.84)
aggr_rel_bias_Q975 = np.quantile(aggr_rel_bias, 0.975)

# RMSE
aggr_rmse_mean = np.mean(aggr_rmse)
aggr_rmse_Q025 = np.quantile(aggr_rmse, 0.025)
aggr_rmse_Q16 = np.quantile(aggr_rmse, 0.16)
aggr_rmse_Q50 = np.quantile(aggr_rmse, 0.50)
aggr_rmse_Q84 = np.quantile(aggr_rmse, 0.84)
aggr_rmse_Q975 = np.quantile(aggr_rmse, 0.975)

aggr_rel_rmse_mean = np.mean(aggr_rel_rmse)
aggr_rel_rmse_Q025 = np.quantile(aggr_rel_rmse, 0.025)
aggr_rel_rmse_Q16 = np.quantile(aggr_rel_rmse, 0.16)
aggr_rel_rmse_Q50 = np.quantile(aggr_rel_rmse, 0.50)
aggr_rel_rmse_Q84 = np.quantile(aggr_rel_rmse, 0.84)
aggr_rel_rmse_Q975 = np.quantile(aggr_rel_rmse, 0.975)

# MSE
aggr_mse_mean = np.mean(aggr_mse)
aggr_mse_Q025 = np.quantile(aggr_mse, 0.025)
aggr_mse_Q16 = np.quantile(aggr_mse, 0.16)
aggr_mse_Q50 = np.quantile(aggr_mse, 0.50)
aggr_mse_Q84 = np.quantile(aggr_mse, 0.84)
aggr_mse_Q975 = np.quantile(aggr_mse, 0.975)

# R-square
aggr_R_square_mean = np.mean(aggr_R_square)
aggr_R_square_Q025 = np.quantile(aggr_R_square, 0.025)
aggr_R_square_Q16 = np.quantile(aggr_R_square, 0.16)
aggr_R_square_Q50 = np.quantile(aggr_R_square, 0.50)
aggr_R_square_Q84 = np.quantile(aggr_R_square, 0.84)
aggr_R_square_Q975 = np.quantile(aggr_R_square, 0.975)



In [None]:
# Output calculated stat
print(f"Aggregate optimal number of iterations statistic: \n mean: {round(aggr_optimal_n_mean, 3)}, median: {round(aggr_optimal_n_Q50, 3)},\n pred. interval (P(0.025)-P(0.975)): {round(aggr_optimal_n_Q025, 3)}-{round(aggr_optimal_n_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_optimal_n_Q50-((aggr_optimal_n_Q50-aggr_optimal_n_Q16)*CIR), 3)}-{round(aggr_optimal_n_Q50+((aggr_optimal_n_Q84-aggr_optimal_n_Q50)*CIR), 3)} \n***")
print(f"Aggregate bias statistic: \n mean: {round(aggr_bias_mean, 3)}, median: {round(aggr_bias_Q50, 3)},\n pred. interval (P(0.025)-P(0.975)): {round(aggr_bias_Q025, 3)}-{round(aggr_bias_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_bias_Q50-((aggr_bias_Q50-aggr_bias_Q16)*CIR), 3)}-{round(aggr_bias_Q50+((aggr_bias_Q84-aggr_bias_Q50)*CIR), 3)} \n***")
print(f"Aggregate RMSE statistic: \n mean: {round(aggr_rmse_mean, 3)}, median: {round(aggr_rmse_Q50, 3)}, pred. interval (P(0.025)-P(0.975)): {round(aggr_rmse_Q025, 3)}-{round(aggr_rmse_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_rmse_Q50-((aggr_rmse_Q50-aggr_rmse_Q16)*CIR), 3)}-{round(aggr_rmse_Q50+((aggr_rmse_Q84-aggr_rmse_Q50)*CIR), 3)} \n***")
print(f"Aggregate MSE statistic: \n mean: {round(aggr_mse_mean, 3)}, median: {round(aggr_mse_Q50, 3)}, pred. interval (P(0.025)-P(0.975)): {round(aggr_mse_Q025, 3)}-{round(aggr_mse_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_mse_Q50-((aggr_mse_Q50-aggr_mse_Q16)*CIR), 3)}-{round(aggr_mse_Q50+((aggr_mse_Q84-aggr_mse_Q50)*CIR), 3)} \n***")
print(f"Aggregate R^2 statistic: \n mean: {round(aggr_R_square_mean, 3)}, median: {round(aggr_R_square_Q50, 3)}, pred. interval (P(0.025)-P(0.975)): {round(aggr_R_square_Q025, 3)}-{round(aggr_R_square_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_R_square_Q50-((aggr_R_square_Q50-aggr_R_square_Q16)*CIR), 3)}-{round(aggr_R_square_Q50+((aggr_R_square_Q84-aggr_R_square_Q50)*CIR), 3)} \n***")
print(f"Aggregate bias(%) statistic: \n mean: {round(aggr_rel_bias_mean, 3)}, median: {round(aggr_bias_Q50, 3)}, pred. interval (P(0.025)-P(0.975)): {round(aggr_rel_bias_Q025, 3)}-{round(aggr_rel_bias_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_rel_bias_Q50-((aggr_rel_bias_Q50-aggr_rel_bias_Q16)*CIR), 3)}-{round(aggr_rel_bias_Q50+((aggr_rel_bias_Q84-aggr_rel_bias_Q50)*CIR), 3)} \n***")
print(f"Aggregate RMSE(%) statistic: \n mean: {round(aggr_rel_rmse_mean, 3)}, median: {round(aggr_rel_rmse_Q50, 3)}, pred. interval (P(0.025)-P(0.975)): {round(aggr_rel_rmse_Q025, 3)}-{round(aggr_rel_rmse_Q975, 3)}, \n conf. interval (p-value=0.95): {round(aggr_rel_rmse_Q50-((aggr_rel_rmse_Q50-aggr_rel_rmse_Q16)*CIR), 3)}-{round(aggr_rel_rmse_Q50+((aggr_rel_rmse_Q84-aggr_rel_rmse_Q50)*CIR), 3)} \n***")