In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from Utilities.utils import *
from Utilities.plot_functions import *
# from hyper_tuning import hyperparameter_selection_random_forest_n_estimators, hyperparameter_selection_random_forest_max_depth

# Select Regression Model & Prediction Variable 

In [None]:
# Select prediction variable, e.g., 'CT_sys', 'Z_compl' an define its unit
prediction_variable = 'Z_compl'  
# variable_unit = 'mL/mmHg'
variable_unit = 'mmHg.s/mL'

# Select regression method, e.g., 'RF','ANN'
regressor = 'RF'  

# Select model based on the input vector
# M1 uses as inputs brSBP, brDBP, HR, cfPWV, crPWV.
# M2 uses as inputs brSBP, brDBP, cfPWV, crPWV.
# M3 uses as inputs MAP, cfPWV, crPWV.
# M4 uses as inputs cfPWV, crPWV.
model_selection = 'M1'

# Enable noise addition and select noise level in %
noise_mode = True
noise_level = 15

# Enable figure saving
save_figure_boolean = False 

# Enable results printing during training
verbose = False

experiment_type = 'insilico'    # Set the type of data being used in the analysis

# Load data

In [None]:
# Import Data
dataset = pd.read_csv('Data/insilico_Zao_CT_dataset.csv')

# Add noise to the dataset
dataset = add_random_noise(dataset, noise_level, noise_level, noise_mode)

print('The dataset size is:', dataset.shape)
dataset.head()

# Regression Analysis

In [None]:
dataset = select_features(dataset, model_selection, prediction_variable)

# Scale dataset 
scaled_dataset = scale_data(dataset)

X, y = split_features_target(scaled_dataset)

indices = np.arange(len(X))
X_train_val, X_test, y_train_val, y_test, indices_train_val, indices_test = train_test_split(X, y,indices, test_size=0.20, random_state=42)

indices2 = np.arange(len(X_train_val))
X_train, X_val, y_train, y_val, indices_train, indices_val = train_test_split(X_train_val, y_train_val,indices2,test_size=0.25, random_state=42) # 0.25x0.8 = 0.20

print('The train set size is:', X_train.shape)
print('The test set size is:', X_test.shape)
print('The validation set size is:', X_val.shape)

## Hyperparameter Selection

In [None]:
# optimal_max_depth = tune_random_forest_max_depth(X_train_val, y_train_val)
# optimal_epochs = tune_ann_epochs(X_train, y_train, X_val, y_val)

## Model Testing

In [None]:
model, y_pred = select_regression_model(X_train, X_test, y_train, y_test, prediction_variable, regressor)
# model, y_pred, hyper_parameters = hyperparameter_tuning(X_train, X_test,y_train, y_test,regressor)

    
y_test_scaled = rescale_values(y_test, prediction_variable, dataset)
y_pred_scaled = rescale_values(y_pred, prediction_variable, dataset)

if regressor == 'ANN':
    y_pred_scaled = np.ravel(y_pred_scaled)

### Print results

In [None]:
print_results(y_test_scaled, y_pred_scaled, variable_unit)

### Plot results

In [None]:
plot_data(y_test_scaled,y_pred_scaled,experiment_type,prediction_variable,regressor,save_figure_boolean)

## Permutation feature importances

In [None]:
# Define the model
model, y_pred = select_regression_model(X_train, X_test, y_train, y_test, prediction_variable, regressor)

# Define parameters
num_iterations = 20
rmse_differences = []
importance_sum = np.zeros(X_train.shape[1])

# Run the permutation feature importance calculation multiple times
for _ in range(num_iterations):
    importances = permutation_importances(model, X_train, y_train, mean_squared_error)
    baseline_rmse = mean_squared_error(y_train, model.predict(X_train))
    new_rmse = baseline_rmse + importances
    rmse_difference = new_rmse - baseline_rmse
    rmse_differences.append(rmse_difference)
    importance_sum += importances

# Calculate mean and standard deviation of RMSE differences
mean_rmse_difference = np.mean(rmse_differences)
std_dev_rmse_difference = np.std(rmse_differences)

# Calculate average importance
average_importance = importance_sum / num_iterations

# Create a list of tuples with feature index and average importance
feature_importance_tuples = list(enumerate(average_importance))

# Sort the list by importance in ascending order
feature_importance_tuples.sort(key=lambda x: x[1])

# Print the importances in ascending order
for i, importance in feature_importance_tuples:
    print(f'Feature {i+1}: {importance}')

# Print the results
print(f"\nMean Increase in RMSE: {np.round(mean_rmse_difference,6)}")
print(f"Standard Deviation of Increase in RMSE: {np.round(std_dev_rmse_difference,6)}")