# Introduction

As our decision tree have also acceptable result, we also optimized the deep neural decision tree for this dataset.

A Deep Neural Decision Tree (DNDT) is a hybrid machine learning model that combines the interpretability of decision trees with the expressive power of deep neural networks. It leverages neural networks to mimic the hierarchical decision-making process of a decision tree, where decisions at each node are learned in a differentiable, end-to-end manner.

## Loading Libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import KFold, StratifiedShuffleSplit
from sklearn.feature_selection import SelectKBest
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Layer
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.constraints import Constraint
from bayes_opt import BayesianOptimization

In [None]:
plt.rcParams['font.family'] = 'Times New Roman'

# constraint

In [None]:
class SetConstraint(Constraint):
    def __init__(self, min_val=-10, max_val=10): # constraint weight range
        self.min_val = min_val
        self.max_val = max_val

    def __call__(self, w):
        return tf.clip_by_value(w, self.min_val, self.max_val)

    def get_config(self):
        return {'min_val': self.min_val, 'max_val': self.max_val}

In [None]:
# Loading Dataset

In [None]:
filename = 'Dataset O.xlsx'
df = pd.read_excel(filename, index_col=0)
X, Y = df.iloc[:, :-1], df.iloc[:, -1]

In [None]:
# Feature selection

In [None]:
selector = SelectKBest(k=10)
X = selector.fit_transform(X, Y)

In [None]:
stratified_split = StratifiedShuffleSplit(n_splits=1, test_size=0.20, random_state=42)

for train_index, test_index in stratified_split.split(X, Y):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

print('X_train: {}     Y_train: {} \n X_test: {}     Y_test:{} '.format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))

In [None]:
class DNDTLayer(Layer):
    def __init__(self, n_cuts=1, temperature=10.0, **kwargs):
        super(DNDTLayer, self).__init__(**kwargs)
        self.n_cuts = n_cuts
        self.temperature = temperature
        self.set_constraint = SetConstraint()

    def build(self, input_shape):
        n_features = input_shape[-1]

        # init cut points for features
        self.cut_points = self.add_weight(
            name='cut_points',
            shape=(n_features, self.n_cuts),
            initializer='uniform',
            trainable=True,
            constraint=self.set_constraint
        )
        # init leaf scores
        self.n_leaves = (self.n_cuts + 1) ** n_features
        self.leaf_scores = self.add_weight(
            name='leaf_scores',
            shape=(self.n_leaves, 1),
            initializer='zeros',
            trainable=True,
            constraint=self.set_constraint
        )

    def call(self, inputs):
        # soft binning
        diffs = tf.expand_dims(inputs, axis=-1) - self.cut_points # [batch, features, cuts]
        bin_probs = tf.sigmoid(diffs * self.temperature)

        # get leaf probabilities
        leaf_probs = tf.ones([tf.shape(inputs)[0], 1])  # Initialize with ones
        for i in range(inputs.shape[-1]):
            # Get probabilities for each feature's bins
            feature_probs = tf.concat([
                tf.reduce_prod(1 - bin_probs[:, i, :], axis=1, keepdims=True),  # Leftmost bin
                bin_probs[:, i, :-1] * tf.reduce_prod(1 - bin_probs[:, i, 1:], axis=1, keepdims=True),  # Middle bins
                tf.reduce_prod(bin_probs[:, i, :], axis=1, keepdims=True)  # Rightmost bin
            ], axis=1)

            # Update
            leaf_probs = tf.expand_dims(leaf_probs, axis=-1) * tf.expand_dims(feature_probs, axis=1)
            leaf_probs = tf.reshape(leaf_probs, [-1, (self.n_cuts+1)**(i+1)])

        # predict
        predictions = tf.matmul(leaf_probs, self.leaf_scores)
        return predictions

    def compute_output_shape(self, input_shape):
        return (input_shape[0], 1)

In [None]:
def create_model(n_cuts, temperature, learning_rate):
    inputs = Input(shape=(X_train.shape[1],))
    doutputs = DNDTLayer(n_cuts=int(n_cuts), temperature=temperature)(inputs)
    model = Model(inputs=inputs, outputs=doutputs)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

In [None]:
def train_model(n_cuts, temperature, learning_rate):
    model = create_model(int(n_cuts), temperature, learning_rate)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X_train, Y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=0
    )
    y_pred = model.predict(X_train).flatten()
    return -mean_squared_error(Y_train, y_pred) # -mse

In [None]:
# Define parameter bounds
pbounds = {'n_cuts': (1, 4),
           'temperature': (1.0, 20.0),
           'learning_rate': (0.001, 0.01)}

# Perform Bayesian optimization
optimizer = BayesianOptimization(f=train_model, pbounds=pbounds, random_state=42)
optimizer.maximize(init_points=5, n_iter=10)

# Get the best hyperparameters
best_params = optimizer.max['params']

# Print the best hyperparameters
print("Best Hyperparameters:")
print(best_params)

In [None]:
# Train the model with the best hyperparameters
best_model = create_model(**best_params)
best_model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0)

# Predict on the training set
y_pred_train = best_model.predict(X_train).flatten()

# Calculate metrics on the training set
mse_train = mean_squared_error(Y_train, y_pred_train)
rmse_train = np.sqrt(mse_train)
mae_train = mean_absolute_error(Y_train, y_pred_train)
r2_train = 1 - mse_train / np.var(Y_train)

# Print the results for training set
print('Training Set: ')
print(f'Mean Squared Error: {mse_train}')
print(f'Root Mean Squared Error: {rmse_train}')
print(f'Mean Absolute Error: {mae_train}')
print(f'R-squared: {r2_train}')

In [None]:
# Train the model with the best hyperparameters
best_model = create_model(**best_params)
best_model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0)

# Predict on the testing set
y_pred_test = best_model.predict(X_test).flatten()

# Calculate metrics on the testing set
mse_test = mean_squared_error(Y_test, y_pred_test)
rmse_test = np.sqrt(mse_test)
mae_test = mean_absolute_error(Y_test, y_pred_test)
r2_test = 1 - mse_test / np.var(Y_test)

# Print the results for testing set
print('\nTesting Set: ')
print(f'Mean Squared Error: {mse_test}')
print(f'Root Mean Squared Error: {rmse_test}')
print(f'Mean Absolute Error: {mae_test}')
print(f'R-squared: {r2_test}')

In [None]:
# Predict on the training set
y_pred_train = best_model.predict(X_train).flatten()

# Calculate residuals for training set
residuals_train = Y_train - y_pred_train

# Predict on the testing set
y_pred_test = best_model.predict(X_test).flatten()

# Calculate residuals for testing set
residuals_test = Y_test - y_pred_test

In [None]:
# Plot actual vs. predicted plot for both training and testing sets
plt.figure(figsize=(10, 6))

# Plot training data
plt.scatter(Y_train, y_pred_train, color='blue', label='Training Data')

# Plot testing data
plt.scatter(Y_test, y_pred_test, color='red', label='Testing Data')

# Plot diagonal line
plt.plot([Y_train.min(), Y_train.max()], [Y_train.min(), Y_train.max()], color='black', lw=2, linestyle='--')

plt.title('Actual vs. Predicted Plot')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid(True)

# Save the figure with 600 DPI as a JPEG image
plt.savefig('F17.jpg', dpi=600, format='jpg', bbox_inches='tight')

plt.show()

In [None]:
# Plot residual plot for both training and testing sets
plt.figure(figsize=(10, 6))

# Plot training residuals
plt.scatter(y_pred_train, residuals_train, color='blue', label='Training Data')

# Plot testing residuals
plt.scatter(y_pred_test, residuals_test, color='red', label='Testing Data')

# Plot horizontal line at y=0
plt.axhline(y=0, color='black', linestyle='-')

plt.title('Residual Plot')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.legend()
plt.grid(True)

# Save the figure with 600 DPI as a JPEG image
plt.savefig('F18.jpg', dpi=600, format='jpg', bbox_inches='tight')

plt.show()