In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

from ppg_project.training_pipeline import XGBoostTrainingPipeline
from ppg_project.dtypes import XGBoostTrainingPipelineConfig

<h1> Define pipeline and load data <h1>

In [None]:
config = XGBoostTrainingPipelineConfig(
    data_path="../data",
    validation_strategy="kfold",
    num_folds=5,
    features_list = ("features","peak","spectrogram",),
    appply_segmentation=False)
pipe = XGBoostTrainingPipeline(config)

<h1> Run process and evaluate model <h1>

In [None]:
results = pipe.run()
results[["rmse","rmsle","mae","r2"]].aggregate([np.mean, np.std])

<h1> Look at feature importance <h1>

In [None]:
import matplotlib.pyplot as plt 

df_feat = pd.json_normalize(results.feature_importances)
list_col_features = ["spectrogram_mean","spectrogram_std"]
for feature in list_col_features:
    list_col = [col for col in df_feat.columns if feature in col]
    print(feature,list_col)
    if list_col:
        df_feat[f"mean_{feature}"] = df_feat[list_col].mean(axis=1)
        df_feat.drop(columns=list_col, inplace=True)

mean_values = df_feat.mean()
std_values = df_feat.std()

# Plot bar plot
fig, ax = plt.subplots()

# Bar plot for mean values
mean_values.plot(kind='barh', yerr=std_values, ax=ax, capsize=4, color='lightblue', edgecolor='black')

# Set plot labels and title
ax.set_ylabel('Mean Value')
ax.set_title('Feature importance with Mean and Std')

# Show the plot
plt.show()