In [1]:
import tensorflow as tf
import tensorflow_decision_forests as tfdf
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd

import shap

In [2]:
wine_qt_path = '../data/WineQT.csv'
RANDOM_SEED = 492

In [3]:
wine_df = pd.read_csv(wine_qt_path)

In [5]:
print(wine_df.head())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  Id  
0      9.4        5   0  
1      9.8        5   1  
2      9

In [6]:
y_series = wine_df['Id']
y = pd.DataFrame(y_series, columns=['Id'])
features = [col for col in wine_df.columns if col != 'Id']
X = wine_df[features]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

In [8]:
X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [11]:
print(len(X_train_scaled))

914


In [10]:
subset_indices = np.random.choice(X_train.index, size=len(X_train), replace=False)

X_train_subset = X_train.loc[subset_indices]
y_train_subset = y_train.loc[subset_indices]

print(X_train_scaled.head())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0       0.105544         -1.379153     1.462888       -0.537610   0.087465   
1       0.621720         -1.046416     0.500012       -0.244623  -0.252294   
2       0.334956         -1.379153     0.905434       -0.610857  -0.412180   
3      -0.640043          0.229075    -1.121673       -0.171377  -0.132379   
4      -0.869454          0.173619    -1.223028       -0.610857  -0.072421   

   free sulfur dioxide  total sulfur dioxide   density        pH  sulphates  \
0             1.921184              1.727330  0.097116 -0.055335   0.537430   
1            -1.030261             -0.990201  0.667681 -0.564916  -0.225502   
2            -0.833498             -1.020395 -1.645700 -0.373823  -0.636312   
3             0.051936             -0.295721 -0.556440  0.454246  -0.342877   
4             0.248699             -0.325915  0.045247  0.836432  -0.166815   

    alcohol   quality  
0  0.042265  1.653380  
1 -1.146

In [17]:
subset_size = int(0.8 * len(X_train_scaled))
            
# Create a subset of the dataset using sampled indices
subset_indices = np.random.choice(X_train_scaled.index, size=subset_size, replace=False)
X_train_test4len = X_train_scaled.loc[subset_indices].reset_index(drop=True)
print(X_train_test4len.head())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0       1.309954          1.670935     0.500012        0.048364  -0.032450   
1      -1.213571          0.672724    -1.070995       -0.684104  -0.751939   
2      -0.984160         -0.769136     1.108144       -0.610857  -0.352223   
3       1.080542          0.783637     0.601368       -0.464363  -0.671996   
4      -0.525337          0.506356    -1.121673       -0.464363  -0.392194   

   free sulfur dioxide  total sulfur dioxide   density        pH  sulphates  \
0             1.035750              0.730902  0.719551 -0.947101  -0.812374   
1            -0.931879             -0.839227 -1.676821  1.664501  -0.694999   
2             0.740606              3.116290 -2.340751 -1.201892  -0.929748   
3            -0.931879             -0.869421  0.460203 -0.883404  -1.105809   
4             0.838987             -0.114552 -0.317840  0.709037  -0.518938   

    alcohol   quality  
0 -0.506550 -0.850662  
1  1.368

In [12]:
X_train_tensor = tf.constant(X_train_subset.values, dtype=tf.float32)
y_train_tensor = tf.constant(y_train_subset.values, dtype=tf.float32)
print(y_train_tensor.shape)

(914, 1)


In [18]:
class GradientBoostedTreesEnsembleClassifier(tf.keras.Model):
    def __init__(self, n_trees=100, max_depth=3, n_estimators=100, subportion = 0.8, sample_method='RANDOM'): 
        super(GradientBoostedTreesEnsembleClassifier, self).__init__()
        self.n_trees = n_trees
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.subportion = subportion
        self.sample_method = sample_method
        self.estimators = []

    def build_estimator(self):
        model = tfdf.keras.GradientBoostedTreesModel(
            task=tfdf.keras.Task.CLASSIFICATION,
            num_trees=np.random.randint(self.n_trees//5, self.n_trees+1), # self.n_trees, #
            max_depth=self.max_depth, #np.random.randint(self.max_depth/2, self.max_depth+1), # self.max_depth, # 
            validation_ratio=0.1,
            subsample=self.subportion,
            sampling_method=self.sample_method,
            random_seed=self.seed # try to set this first. Maybe this is enough
        )
        return model
    
    # if not, the number of trees/depth different for each estimator

    def fit(self, X, y):
        
        X_tensor = tf.constant(X.values, dtype=tf.float32)
        y_tensor = tf.constant(y.values, dtype=tf.float32)
        tot_dataset_size = len(X)
        
        @tf.function
        def train_single_estimator(seed):
            self.seed = int(seed)
            subset_size = int(self.subportion * tot_dataset_size)
            
            # Create a subset of the dataset using sampled indices
            subset_indices = np.random.choice(tot_dataset_size, size=subset_size, replace=False)
            
            X_subset = tf.gather(X_tensor, subset_indices)
            y_subset = tf.gather(y_tensor, subset_indices)
            
            # Building each estimator
            estimator = self.build_estimator()
            estimator.fit(X_subset, y_subset, verbose=0)
            self.estimators.append(estimator)
            
        for seed in np.random.randint(0, 1000000, size=self.n_estimators):
            train_single_estimator(seed)
    
    @tf.function
    def predict_mean(self, X): # use all estimators
        X_tensor = tf.constant(X.values, dtype=tf.float32)
        predictions = []
        
        for i, estimator in enumerate(self.estimators):
            estimator_predictions = estimator.predict(X_tensor)  # Get predictions from the current estimator
            print(f"Shape of prediction from estimator {i}: {estimator_predictions.shape}")  # Print the shape
            predictions.append(estimator_predictions)
        
        # predictions = [pred if len(pred.shape) == 2 else pred[:, np.newaxis] for pred in predictions]

        # Stack predictions for mean and standard deviation calculation
        # predictions_stacked = np.stack(predictions, axis=-1)
        
        
        # mean_prediction = np.mean(predictions_stacked, axis=-1)
        # sd_prediction = np.std(predictions_stacked, axis=-1)
        # return mean_prediction, sd_prediction
        return predictions


In [19]:
gbt_model = GradientBoostedTreesEnsembleClassifier(
    n_trees=50, 
    max_depth=5, 
    n_estimators=80, 
    subportion=0.8, 
    sample_method='RANDOM'
)

In [None]:
gbt_model.fit(X_train_scaled, y_train)

Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmph4p2v238 as temporary training directory


[INFO 24-06-05 18:23:56.4142 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmph4p2v238/model/ with prefix d522cd1bf8fd4d02
[INFO 24-06-05 18:23:57.0110 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 339034 node(s), and 12 input feature(s).
[INFO 24-06-05 18:23:57.0110 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:23:57.0111 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpa83fua_l as temporary training directory


[INFO 24-06-05 18:24:18.4261 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpa83fua_l/model/ with prefix 3dae0e753e6a4082


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmprf_21d9v as temporary training directory


[INFO 24-06-05 18:24:19.0746 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 345740 node(s), and 12 input feature(s).
[INFO 24-06-05 18:24:19.0747 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:24:19.0747 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:24:38.3212 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmprf_21d9v/model/ with prefix c473d0dbc8784fa6


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmplms69szt as temporary training directory


[INFO 24-06-05 18:24:38.9439 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 330002 node(s), and 12 input feature(s).
[INFO 24-06-05 18:24:38.9439 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:24:38.9439 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:24:58.1973 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmplms69szt/model/ with prefix 52af7baffe1547b3


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmplux5i38b as temporary training directory


[INFO 24-06-05 18:24:58.7667 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 327348 node(s), and 12 input feature(s).
[INFO 24-06-05 18:24:58.7667 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:24:58.7667 CEST kernel.cc:1061] Use fast generic engine




[INFO 24-06-05 18:25:18.2203 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmplux5i38b/model/ with prefix e2697805b20d4fea




[INFO 24-06-05 18:25:18.8084 CEST decision_forest.cc:734] Model loaded with 17534 root(s), 333360 node(s), and 12 input feature(s).
[INFO 24-06-05 18:25:18.8085 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:25:18.8085 CEST kernel.cc:1061] Use fast generic engine






Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpemvd2p2r as temporary training directory






[INFO 24-06-05 18:25:37.5782 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpemvd2p2r/model/ with prefix 136c007d290d4276




[INFO 24-06-05 18:25:38.1258 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 317136 node(s), and 12 input feature(s).
[INFO 24-06-05 18:25:38.1258 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:25:38.1258 CEST kernel.cc:1061] Use fast generic engine






Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp07c0yo25 as temporary training directory


[INFO 24-06-05 18:25:56.2776 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp07c0yo25/model/ with prefix 72c6d6c9c7e74835


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp2gj9qs6a as temporary training directory


[INFO 24-06-05 18:25:56.8596 CEST decision_forest.cc:734] Model loaded with 17556 root(s), 354272 node(s), and 12 input feature(s).
[INFO 24-06-05 18:25:56.8597 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:25:56.8597 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:26:15.4379 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp2gj9qs6a/model/ with prefix 265a95d146a04bfd


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpbo2tnfrw as temporary training directory


[INFO 24-06-05 18:26:16.0109 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 336862 node(s), and 12 input feature(s).
[INFO 24-06-05 18:26:16.0109 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:26:16.0109 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:26:36.1391 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpbo2tnfrw/model/ with prefix 752986de35184c5b


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpapm3j_f1 as temporary training directory


[INFO 24-06-05 18:26:36.6912 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 308436 node(s), and 12 input feature(s).
[INFO 24-06-05 18:26:36.6912 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:26:36.6913 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:26:56.6837 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpapm3j_f1/model/ with prefix 3b3dd754b0ee409e


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp6gtdc8jh as temporary training directory


[INFO 24-06-05 18:26:57.2364 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 321286 node(s), and 12 input feature(s).
[INFO 24-06-05 18:26:57.2364 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:26:57.2364 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:27:18.3762 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp6gtdc8jh/model/ with prefix 19df1ae1aa424119
[INFO 24-06-05 18:27:18.9501 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 340650 node(s), and 12 input feature(s).
[INFO 24-06-05 18:27:18.9501 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:27:18.9501 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpk02siqsh as temporary training directory


[INFO 24-06-05 18:27:40.0659 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpk02siqsh/model/ with prefix 862b4c5078f24681


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp4x3wpome as temporary training directory


[INFO 24-06-05 18:27:40.6237 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 327524 node(s), and 12 input feature(s).
[INFO 24-06-05 18:27:40.6237 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:27:40.6237 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:28:03.1118 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp4x3wpome/model/ with prefix 0a7e5eca9c704eee
[INFO 24-06-05 18:28:03.6598 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 313148 node(s), and 12 input feature(s).
[INFO 24-06-05 18:28:03.6598 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:28:03.6598 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpbynikkf9 as temporary training directory


[INFO 24-06-05 18:28:29.9464 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpbynikkf9/model/ with prefix 48f21d1a48654ec7


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpp4_ajwne as temporary training directory


[INFO 24-06-05 18:28:30.4999 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 331820 node(s), and 12 input feature(s).
[INFO 24-06-05 18:28:30.5000 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:28:30.5000 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:28:56.1519 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpp4_ajwne/model/ with prefix 5f95a1100d9e4f55


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp1rpmj_om as temporary training directory


[INFO 24-06-05 18:28:56.7027 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 324728 node(s), and 12 input feature(s).
[INFO 24-06-05 18:28:56.7027 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:28:56.7027 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:29:18.4225 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp1rpmj_om/model/ with prefix f744339ed5594a14
[INFO 24-06-05 18:29:19.0046 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 339530 node(s), and 12 input feature(s).
[INFO 24-06-05 18:29:19.0046 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:29:19.0046 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp_40d_5qm as temporary training directory


[INFO 24-06-05 18:29:36.8081 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp_40d_5qm/model/ with prefix b1401a33271d4a43


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpd4yyz4k9 as temporary training directory


[INFO 24-06-05 18:29:37.3756 CEST decision_forest.cc:734] Model loaded with 17556 root(s), 338824 node(s), and 12 input feature(s).
[INFO 24-06-05 18:29:37.3756 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:29:37.3756 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:29:55.6834 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpd4yyz4k9/model/ with prefix fd401274a0044c24


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpubks1usj as temporary training directory


[INFO 24-06-05 18:29:56.2461 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 327974 node(s), and 12 input feature(s).
[INFO 24-06-05 18:29:56.2461 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:29:56.2461 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:30:15.1976 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpubks1usj/model/ with prefix 99198365180e4f01


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmprd9p73hg as temporary training directory


[INFO 24-06-05 18:30:15.7723 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 350046 node(s), and 12 input feature(s).
[INFO 24-06-05 18:30:15.7723 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:30:15.7723 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:30:36.8150 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmprd9p73hg/model/ with prefix 2aec26e5e95142f5


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpjh6jv3pj as temporary training directory


[INFO 24-06-05 18:30:37.3540 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 318332 node(s), and 12 input feature(s).
[INFO 24-06-05 18:30:37.3541 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:30:37.3541 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:30:59.5854 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpjh6jv3pj/model/ with prefix baec611fd1dd4005


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmppai42mas as temporary training directory


[INFO 24-06-05 18:31:00.1293 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 315808 node(s), and 12 input feature(s).
[INFO 24-06-05 18:31:00.1293 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:31:00.1293 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:31:21.6897 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmppai42mas/model/ with prefix c702500a5c894f6b
[INFO 24-06-05 18:31:22.2738 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 353818 node(s), and 12 input feature(s).
[INFO 24-06-05 18:31:22.2738 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:31:22.2739 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpomwuvu5u as temporary training directory


[INFO 24-06-05 18:31:41.4067 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpomwuvu5u/model/ with prefix ad90e20815314e82


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmppxefwyxi as temporary training directory


[INFO 24-06-05 18:31:41.9460 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 320342 node(s), and 12 input feature(s).
[INFO 24-06-05 18:31:41.9461 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:31:41.9461 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:32:04.3473 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmppxefwyxi/model/ with prefix c43b32142ab34d67


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpnqdw5t5d as temporary training directory


[INFO 24-06-05 18:32:04.8901 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 315278 node(s), and 12 input feature(s).
[INFO 24-06-05 18:32:04.8901 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:32:04.8901 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:32:25.3956 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpnqdw5t5d/model/ with prefix 17db70c2fdd74fb8


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp8gcehfaj as temporary training directory


[INFO 24-06-05 18:32:25.9474 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 328722 node(s), and 12 input feature(s).
[INFO 24-06-05 18:32:25.9474 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:32:25.9474 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:32:46.4480 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmp8gcehfaj/model/ with prefix 450f330706194f20
[INFO 24-06-05 18:32:46.9650 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 293456 node(s), and 12 input feature(s).
[INFO 24-06-05 18:32:46.9651 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:32:46.9651 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpjl463vx1 as temporary training directory


[INFO 24-06-05 18:33:07.9020 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpjl463vx1/model/ with prefix cbea3f7cbeba4a06
[INFO 24-06-05 18:33:08.5350 CEST decision_forest.cc:734] Model loaded with 17556 root(s), 356466 node(s), and 12 input feature(s).
[INFO 24-06-05 18:33:08.5350 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:33:08.5350 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpkhse6kha as temporary training directory


[INFO 24-06-05 18:33:30.0771 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpkhse6kha/model/ with prefix 5576411ee23c486e


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmprqzy4bym as temporary training directory


[INFO 24-06-05 18:33:30.6482 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 325402 node(s), and 12 input feature(s).
[INFO 24-06-05 18:33:30.6482 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:33:30.6482 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:33:51.1805 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmprqzy4bym/model/ with prefix 8a15c7d282614485
[INFO 24-06-05 18:33:51.7595 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 346478 node(s), and 12 input feature(s).
[INFO 24-06-05 18:33:51.7595 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:33:51.7595 CEST kernel.cc:1061] Use fast generic engine


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpe2ssuwqm as temporary training directory


[INFO 24-06-05 18:34:11.8590 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpe2ssuwqm/model/ with prefix 8656b48b6ec7483e


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpf9fhm_oj as temporary training directory


[INFO 24-06-05 18:34:12.4463 CEST decision_forest.cc:734] Model loaded with 17578 root(s), 333904 node(s), and 12 input feature(s).
[INFO 24-06-05 18:34:12.4464 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:34:12.4464 CEST kernel.cc:1061] Use fast generic engine
[INFO 24-06-05 18:34:32.8987 CEST kernel.cc:1233] Loading model from path /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpf9fhm_oj/model/ with prefix 13ee6bedc31249e6


Use /var/folders/3r/pzt2_p_x2xdfl2057nxkp3nm0000gn/T/tmpweam7b59 as temporary training directory


[INFO 24-06-05 18:34:33.4605 CEST decision_forest.cc:734] Model loaded with 17556 root(s), 313052 node(s), and 12 input feature(s).
[INFO 24-06-05 18:34:33.4605 CEST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-06-05 18:34:33.4605 CEST kernel.cc:1061] Use fast generic engine


In [None]:
prediction = gbt_model.predict_mean(X_test_scaled)

In [34]:
mean_predict, sd_predict = gbt_model.predict_mean(X_test_scaled)

2024-06-05 16:00:20.332044: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: Shapes of all inputs must match: values[0].shape = [229,1598] != values[6].shape = [229,1596]


InvalidArgumentError: {{function_node __wrapped__Pack_N_80_device_/job:localhost/replica:0/task:0/device:CPU:0}} Shapes of all inputs must match: values[0].shape = [229,1598] != values[6].shape = [229,1596] [Op:Pack] name: stack

In [None]:
y_test_eval = np.array(y_test).flatten()
y_pred_eval = mean_predict.numpy().flatten()

In [None]:
PLOT = 30

In [None]:
y_plot = y_test[:PLOT]
x_axis = np.arange(len(y_plot))
y_mean_predict = mean_predict[:PLOT].numpy()
y_var_predict = sd_predict[:PLOT].numpy()

In [None]:
plt.figure(figsize=(8, 6))
plt.xlim(-1,PLOT)
plt.scatter(x_axis, y_plot, color='lightblue', label="Ground Truth")
plt.scatter(x_axis, y_mean_predict.flatten(), color='pink', alpha=0.8, label='Mean Prediction')
# plt.errorbar(x_axis, y_mean_predict.flatten(), yerr=root_mse, fmt='none', ecolor='pink', label="Root MSE")
plt.errorbar(x_axis, y_mean_predict.flatten(), yerr=y_var_predict.flatten(), fmt='none', color='darkred', alpha=0.8, label="Uncertainty")
plt.ylabel('prices')
plt.legend()
plt.grid(True)  # Enable grid for better visualization
plt.show()

In [None]:
# predict gives the class number
# make a histogram of the class prediction
# entropy to measure the uncertainty