In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import sklearn
import pickle 

from Utilities.utils import *
from Utilities.plot_functions import *

# Define Experiment Characteristics

In [None]:
# Select the prediction variable, e.g., 'Ees', 'dead_volume'
prediction_variable = 'Ees'  
variable_unit = 'mmHg/mL'

# Set XGB as the regression method
regressor = 'XGB'     

# Select input features
input_selection = 'M3'

# Enable noise addition, select noise level, e.g., 10 %, 20 %, 30 %, and select the noisy features, e.g., 'STI' or 'BP'
noise_mode = False
noise_level = 10
noisy_features = 'STI'

# Enable figure saving
save_figure_boolean = False 

experiment_type = 'insilico_{}'.format(input_selection)    # Set the type of data being used in the analysis

# Load & Prepare Data for Analysis

In [None]:
dataset = pd.read_csv('Data/insilico_elastance_data.csv')
dataset = select_columns_based_on_input(dataset, input_selection, prediction_variable)       

dataset_scaled = scale_data(dataset)
noisy_dataset = add_random_noise(dataset_scaled, noise_level, noisy_features, input_selection,noise_mode)

print(dataset.shape)
dataset.head()

# Regression Analysis

In [None]:
X, y = split_features_target(noisy_dataset)

# Load saved train and test indices
# train_indices, test_indices = load_train_test_indices()

indices = np.arange(len(X))
X_train, X_test, y_train, y_test,train_indices, test_indices = train_test_split(X, y, indices,test_size=0.3, random_state=42)
X_train = X[train_indices]; y_train = y[train_indices]
X_test = X[test_indices]; y_test = y[test_indices]

print('The train set size is:', X_train.shape)
print('The test set size is:', X_test.shape)

# model, y_pred, best_parameters = hyperparameter_tuning(X_train, X_test, y_train, y_test,regressor) 
model, y_pred = elastance_xgb_regressor(X_train,X_test,y_train,y_test,prediction_variable, input_selection)

rescaled_y_test = rescale_values(y_test, prediction_variable, dataset)
rescaled_y_pred = rescale_values(y_pred, prediction_variable, dataset)

print_results(rescaled_y_test, rescaled_y_pred, variable_unit)    
plot_data(rescaled_y_test, rescaled_y_pred, experiment_type,prediction_variable,regressor,save_figure_boolean)

# Calculate Learning Curves

In [None]:
# Define the XGBoost model using optimized hyperparameters
xgb_model = set_xgboost_params(prediction_variable, input_selection)

# Define the cross-validation strategy
cv = 5

# Define the train sizes for learning curve
train_sizes = np.linspace(0.01, 1.0, 50)

# Define the scoring metric (e.g., 'neg_mean_squared_error')
scoring = 'neg_mean_squared_error'

# Plot the learning curve
plot_learning_curve(xgb_model, "Learning Curve", X, y, cv=cv, train_sizes=train_sizes, scoring=scoring)

# Show the plot
plt.show()