# Panic Project (DHLAB) - Multiclass Classification PyCaret Model for Panic Severity Prediction

author:  `@cyshin971`  

date:    `2025-07-xx`  

Instructions:
- Scrape data (see `README` - `Instructions` - `Data Scraping`)  
- Run `data_preprocessing.ipynb`
- Run `data_imputation.ipynb`
- Run `data_analysis.ipynb`
- Under ⚙️|Settings, specify name of the `scraped_data_filename` you want to use
  - Specify how many days prior to panic (`dbp`) you want to use (`1`, `2`, `3`)
  - (Optional) specify type of imputation you want to use `use_growing_average`, `null_default_zero`

version: `3-1`

In [None]:
version = "3-1"

# 📚 | Import Libraries 

Required Packages:
- `python` (`3.10`)
- `pandas`  
- `numpy`
- `json`
- `matplotlib`
- `pyacaret`
- `shap`
- `sklearn`

In [None]:
import config as cfg
import logging

import os
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.4f' % x)

import numpy as np
import matplotlib.pyplot as plt
logging.getLogger('matplotlib').setLevel(logging.WARNING)

from library.pandas_utils import move_column, remove_columns, create_empty_df, read_csv, aggregate_by_column
from library.text_utils import save_as_csv
from library.json_utils import load_dict_from_file
from library.path_utils import get_file_path
from library.matplotlib_utils import plot_histogram_of_counts

from pycaret.classification import *
import shap
from sklearn.ensemble import VotingClassifier, StackingClassifier

# ⚙️ | Settings

In [None]:
scraped_data_filename = "final_result_20250626_360_no_ffill" # Name of the scraped data file without extension (.csv)
# leave as None if you want to retrieve from the latest imputed data file
use_growing_avg = None # Set to True/False to retrieve growing average '_grw' OR average '_avg' data
null_default_zero = None # Set to True/False to retrieve zero filled '_zero' OR global average filled '_global' data

dbp_param = 2 # from 1 to 3, depending on the DBP model you want to use.

only_top_10_features = False # If True, only the top 10 features will be used for the model training and evaluation.
top_10_features = None
# Top 10 features for 1 DBP 360 model
# top_10_features = ['STAI_X2', 'age', 'PHQ_9', 'smoking', 'CTQ_4', 'annoying', 'gender', 'drinkHx', 'SLT4', 'step_delta2']
# Top 10 features for 2 DBP 360 model
# top_10_features = ['smoking(1)', 'step_mean_delta(2)', 'step_hvar_mean_delta(2)', 'step_delta2(1)', 'suicideHx', 'steps_variance(1)', 'marriage', 'age', 'gender', 'step_delta2(2)']
# Top 10 features for 3 DBP 360 model
# top_10_features = ['HR_mean(3)', 'HR_acrophase(2)', 'steps_mean(3)', 'age', 'steps_maximum(2)', 'smoking(1)', 'HR_var(3)', 'gender', 'suicideHx', 'steps_maximum(3)']

# 📁 | Path Variables 

In [None]:
DATA_PATH = "./data"
TMP_PATH = "./_tmp"
OUT_PATH = TMP_PATH
OUTPUT_PATH = "./panic_severity_model/_results"

try:
	current_config = load_dict_from_file(OUT_PATH, 'current_config')
except FileNotFoundError:
    raise FileNotFoundError(f"File not found: {get_file_path(OUT_PATH, 'current_config.csv')}.\nPlease run data_preprocessing.ipynb first.")

print(f"Loaded current config with {len(current_config)} keys:")
scraped_data_filename = None
for k, v in current_config.items():
    if k == 'scraped_data_filename':
        scraped_data_filename = v
    elif k == 'preproc_version':
        preproc_version = v
    elif k == 'imputation_version':
        imputation_version = v
    print(f"  {k}: {v}")

if scraped_data_filename is None:
	raise ValueError("scraped_data_filename not found in current_config. Please ensure that data_preprocessing.ipynb has been run successfully before running this notebook.")

# 🌐 | Global Variables

In [None]:
class OUTPUT:
    num_classes = 3
    class_names = ['Mild', 'Moderate', 'Severe']
    
    label2name = dict(enumerate(class_names))
    name2label = {v: k for k, v in label2name.items()}
    
    plot_label2name = {
		'class_0': 'Mild',
		'class_1': 'Moderate',
		'class_2': 'Severe'
	}
    plot_name2label = {v: k for k, v in plot_label2name.items()}

    color_name2color = {
		'Mild': 'skyblue',
		'Moderate': 'orange',
		'Severe': 'lightcoral'
	}
    
    output_dict = {
		1: 'Mild',
		2: 'Mild',
		3: 'Moderate',
		4: 'Severe',
		5: 'Severe'
	}
    output_dict_inv = {v: k for k, v in output_dict.items()}

    @staticmethod
    def get_label_name(label):
        return OUTPUT.label2name[label]
    @staticmethod
    def get_label_from_name(name):
        return OUTPUT.name2label[name]

# ⚒️ | Preprocessed Data

In [None]:
try:
	features_dict = load_dict_from_file(DATA_PATH, 'panic_features_dict')
except FileNotFoundError:
    raise FileNotFoundError(f"File not found: {get_file_path(OUT_PATH, 'panic_features_dict.csv')}.\nPlease run data_preprocessing.ipynb first.")

print(f"Loaded features dict with {len(features_dict)} keys:")
for k, v in features_dict.items():
        print(f"  {k}: {v}")

if use_growing_avg is None:
	use_growing_avg = features_dict.get('use_growing_avg', False)
if null_default_zero is None:
	null_default_zero = features_dict.get('null_default_zero', False)

In [None]:
# NOTE: The following lines can be used in development mode to select data files generated through development mode
# spec = f"_{('grw' if use_growing_avg else 'avg')}_{('zero' if null_default_zero else 'global')}"
# data_filename = f'panic_pre_data_filled_{imputation_version}({scraped_data_filename}){spec}'

# NOTE: Make sure to change the file names below to match the actual files you have
pre_data = read_csv(get_file_path(DATA_PATH, f'panic_pre_data_filled.csv'))
display(pre_data.head(3))
metadata = read_csv(get_file_path(DATA_PATH, f'panic_metadata.csv'))
display(metadata.head(3))
demography_data = read_csv(get_file_path(DATA_PATH, f'panic_demography_data.csv'))
display(demography_data.head(3))
patient_data = read_csv(get_file_path(DATA_PATH+'/analysis', f'panic_patient_analysis.csv'))
display(patient_data.head(3))

print(f"Number of Demographic Features: {len(features_dict['demography'])}")
print(f"Number of Daily Features: {len(features_dict['dailylog'])}")
print(f"Number of Life Log Features: {len(features_dict['lifelog'])}")
print(f"Number of Questionnaire Features: {len(features_dict['questionnaire'])}")

# 🔄️ | Data Processing

## Filter Relevant Data

In [None]:
filtered_metadata = create_empty_df()
filtered_pre_data = create_empty_df()
proc_data_init = create_empty_df()

# Filter metadata for entries with at least dbp_param days of prior data
print(f"Found {len(metadata[metadata['panic_label'] == 1])} entries with panic label.")
proc_data_init = metadata[(metadata[f'panic_label'] == 1) &
                          (metadata[f'valid_entry_{dbp_param}'] == 1)].copy()
filtered_panic_metadata_entry_ids = proc_data_init['entry_id'].unique()
filtered_metadata = metadata[(metadata['ref_event_id'].isin(filtered_panic_metadata_entry_ids)) &
                             (metadata[f'dbp'] <= dbp_param)].copy()
print(f"Found {len(filtered_panic_metadata_entry_ids)} entries with panic label and at least {dbp_param} days of prior data.")

# Perform checks
unique_dbp = filtered_metadata['dbp'].unique()
if len(unique_dbp) != dbp_param:
	raise ValueError(f"Expected {dbp_param} unique DBP values, found {len(unique_dbp)}: {unique_dbp}")
del unique_dbp

filtered_entry_ids = filtered_metadata['entry_id'].unique()
filtered_panic_entry_ids = filtered_metadata['ref_event_id'].unique()
# Filter pre_data for entries that reference panic events with at least dbp_param days of prior data
filtered_pre_data = pre_data[pre_data['entry_id'].isin(filtered_entry_ids)].copy()

# Perform checks
if len(filtered_pre_data) != len(filtered_metadata):
	raise ValueError(f"Filtered pre_data length {len(filtered_pre_data)} does not match filtered_metadata length {len(filtered_metadata)}")
print(f"Filtered data contains {len(filtered_panic_entry_ids)} unique panic events and {len(filtered_entry_ids)} unique entry IDs.")
print(f"Filtered pre_data contains {len(filtered_pre_data['ID'].unique())} unique IDs.")
del filtered_entry_ids

# Initialize processed data with correct entries
proc_data_init = proc_data_init[features_dict['id']+features_dict['label']].copy()
print(f"Initial processed data contains {len(proc_data_init)} entries with {len(proc_data_init.columns)} columns.")
display(proc_data_init.head(5))

## 🧱 | Construct Processed Data

In [None]:
proc_data_int = create_empty_df()
proc_data_int = proc_data_init.copy()

# remove 'severity' from features_dict['dailylog]
features_dict['dailylog'] = [f for f in features_dict['dailylog'] if f != 'severity']

if only_top_10_features:
    top_10_demo = [f for f in top_10_features if f in features_dict['demography']]
    demography_data = demography_data[['ID'] + top_10_demo].copy()
# use demography data to add demographic features to proc_data using ID (multiple entries per ID)
proc_data_int = pd.merge(proc_data_int, demography_data, on='ID', how='left')
	

for i in range(1, dbp_param + 1):
    # make a dictionary of 'entry_id' : 'ref_event_id' for the current dbp
	dbp_dict = filtered_metadata[filtered_metadata['dbp'] == i].set_index('entry_id')['ref_event_id'].to_dict()
	print(f"Processing data for {i} days before panic.")

	entry_ids = dbp_dict.keys()
	filtered_pre_data_i = filtered_pre_data[filtered_pre_data['entry_id'].isin(entry_ids)].copy()
	if len(filtered_pre_data_i) != len(dbp_dict.keys()):
		raise ValueError(f"Filtered pre_data length {len(filtered_pre_data_i)} does not match filtered_metadata length {len(dbp_dict.keys())} for {i} days before panic")
  	# Update 'entry_id' in filtered_pre_data_i to the corresponding 'ref_event_id' from dbp_dict
	filtered_pre_data_i['entry_id'] = filtered_pre_data_i['entry_id'].map(dbp_dict)
	
	features_list = ['entry_id']+features_dict['dailylog']+features_dict['lifelog']
	if i == dbp_param:
		features_list += features_dict['questionnaire']
   
	filtered_pre_data_i = filtered_pre_data_i[features_list].copy()
	# rename ALL non-ID columns to include the suffix
	if dbp_param > 1:
		cols_to_rename = [c for c in filtered_pre_data_i.columns if c != 'entry_id']
		rename_map = {c: f"{c}({i})" for c in cols_to_rename}
		filtered_pre_data_i.rename(columns=rename_map, inplace=True)

	proc_data_int = pd.merge(proc_data_int, filtered_pre_data_i, on='entry_id', how='left', suffixes=('', f'_{i}'))

# Use OUTPUT.output_dict to map severity labels
proc_data_int['severity'] = proc_data_int['severity'].map(OUTPUT.output_dict)

if only_top_10_features:
	# Filter proc_data_int to only include the top 10 features
	top_10_features = [f for f in top_10_features if f in proc_data_int.columns]
	if len(top_10_features) != 10:
		raise ValueError(f"No top 10 features found in proc_data_int ({len(top_10_features)} found). Expected 10.\n{top_10_features}")
	proc_data_int = proc_data_int[features_dict['id'] + ['panic', 'severity', 'panic_label'] + top_10_features].copy()

display(proc_data_int.head(3))

## 💾 | Save Processed Data

In [None]:
proc_data = create_empty_df()
proc_data = proc_data_int.copy()

r_cols = ['panic',
          'panic_label']
remove_columns(proc_data, r_cols)
move_column(proc_data, 'severity', -1)
display(proc_data.head(3))
save_as_csv(proc_data, DATA_PATH, f'panic_severity_multi_proc_data_{dbp_param}days_{version}({scraped_data_filename})', index=False)

## 🔍 | Processed Data Analysis

In [None]:
display(patient_data.head(2))

In [None]:
agg_matrix = [
	('n_valid_panic', 'entry_id', 'count'),
	('n_mild', 'severity', lambda x: (x == 'Mild').sum()),
	('n_moderate', 'severity', lambda x: (x == 'Moderate').sum()),
	('n_severe', 'severity', lambda x: (x == 'Severe').sum()),
]

proc_data_agg = aggregate_by_column(proc_data, 'ID', agg_matrix)

# merge the relevant IDs from patient_data into proc_data_agg to get the patient data
proc_data_agg = pd.merge(proc_data_agg, patient_data, on='ID', how='left')
display(proc_data_agg.head(3))

In [None]:
plot_histogram_of_counts(proc_data_agg['n_entries'],
						 title='Number of Entries per Patient', figsize=(8, 3),
						 xlabel='Number of Entries',
						 ylabel='Number of Patients', ymax=20,
       					 bins_step=50)
print(f"Number of patients > 100 entries: {len(proc_data_agg[proc_data_agg['n_entries'] > 100])}")
print(f"Number of patients with <= 100 entries: {len(proc_data_agg[proc_data_agg['n_entries'] <= 100])}")

In [None]:
# make a histogram of the severity distribution
plt.figure(figsize=(5, 3))
severity_counts = proc_data['severity'].value_counts().sort_index()
total_count = severity_counts.sum()
colors = [OUTPUT.color_name2color['Mild'], OUTPUT.color_name2color['Moderate'], OUTPUT.color_name2color['Severe']]
ax = severity_counts.plot(kind='bar', color=colors)
plt.title('Severity Distribution')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.grid(axis='y')

# Add labels with counts and percentages at the center of each bar
for p in ax.patches:
	count = p.get_height()
	percentage = f"{(count / total_count * 100):.1f}%"
	ax.annotate(f'{count}\n{percentage}',
				(p.get_x() + p.get_width() / 2., p.get_height() / 2.),
				ha='center', va='center', fontsize=10, color='black', xytext=(0, 0),
				textcoords='offset points')

# Remove the 'severity' label from the bottom
ax.set_xlabel('')

plt.tight_layout()
plt.show()

# 🤖 | Modeling

In [None]:
data = proc_data.copy()
remove_columns(data, features_dict['id'])
print(f"Processed data contains {len(data)} entries with {len(data.columns)} columns after removing ID columns.")
display(data.head(3))

In [None]:
# Initialize PyCaret setup
clf = setup(
    data=data,
    target='severity',               # replace with your target column name
    session_id=123,                  # for reproducibility
    normalize=True,                  # scale numeric features
    transformation=False,            # turn off power transformation
    train_size=0.8,                  # 80/20 split
    fold=5,                          # 5-fold cross-validation
    fold_strategy='stratifiedkfold',
    numeric_imputation='mean',
    remove_multicollinearity=True,   # for small datasets, this is often helpful
	multicollinearity_threshold=0.9, # threshold for removing multicollinear features
	# html=False,                    # do not generate HTML report (use plain-text output)
    verbose=True
)

# 🚂 | Training

In [None]:
# Compare baseline models and select the best by Accuracy
best_model = compare_models(sort='Accuracy')

# 🧪 | Test

In [None]:
results = pull()  # Get the latest output table as a DataFrame
# Cross-Validation results
# print("Cross-Validation Results:")
# display(results)  # Jupyter display (can further style if you want)

In [None]:
# Evaluate on hold-out set (20% test split)
holdout_results = predict_model(best_model)
print(f"Hold-out Set Results (20% test split) for {OUTPUT.num_classes} classes (test_size={len(holdout_results)}):")

In [None]:
# save the best model
os.makedirs('./_results', exist_ok=True)  # Ensures the folder exists
save_model(best_model, f'./_results/panic_severity_multi_best_model_{dbp_param}days_{version}({scraped_data_filename})')

In [None]:
spec_model_name = None
if spec_model_name:
    # Create a specific model (e.g., Random Forest)
	spec_model = create_model(spec_model_name, fold=5, cross_validation=True)
	print(f"Created specific model: {spec_model_name}")

	# Evaluate the specific model
	spec_results = predict_model(spec_model)
	print(f"Specific Model Results ({spec_model_name}):")

# 🔍 | Analysis

## SHAP Values

In [None]:
# Check if the best model is TreeExplainer-compatible
tree_model_ids = ['et', 'rf', 'gbc', 'lightgbm', 'dt']
tree_model_names = [
    'Extra Trees Classifier', 'Random Forest Classifier',
    'Gradient Boosting Classifier', 'Light Gradient Boosting Machine',
    'Decision Tree Classifier'
]

# Function to check compatibility by class name
def is_tree_model(model):
    model_name = model.__class__.__name__.lower()
    # Try common tree model keywords
    return any(keyword in model_name for keyword in ['forest', 'tree', 'boost', 'lightgbm'])


In [None]:
# SHAP analysis only if TreeExplainer compatible
if spec_model_name:
    logging.info(f"Using specific model: {spec_model_name}")
    shap_model = spec_model
else:
    logging.info("Using best model from PyCaret compare_models.")
    shap_model = best_model

is_compatible = is_tree_model(shap_model)

if is_compatible:
    print(f"Best model ({shap_model.__class__.__name__}) is compatible with SHAP TreeExplainer.")

    # Extract features (remove prediction/score columns)
    feature_cols = [col for col in holdout_results.columns if col in data.columns and col != 'severity']
    X_holdout = holdout_results[feature_cols]

    # --- Robust estimator unwrapping for ensembles ---
    model_to_explain = shap_model
    base_estimator_key = None  # Track which base estimator is used
    # Unwrap only if Voting or Stacking ensemble
    if isinstance(model_to_explain, (VotingClassifier, StackingClassifier)):
        named_estimators = dict(model_to_explain.named_estimators_)
        # Try to select a tree-based model in order of preference
        for key in ['rf', 'et', 'gbc', 'lightgbm', 'dt']:
            if key in named_estimators and is_tree_model(named_estimators[key]):
                model_to_explain = named_estimators[key]
                base_estimator_key = key
                print(f"Selected base estimator '{key}' from Voting/Stacking ensemble.")
                break
        if base_estimator_key is None:
            print("Warning: No compatible tree model found in the ensemble; SHAP will use the full ensemble.")

    print("Model to explain:", type(model_to_explain))

    # Build the SHAP TreeExplainer
    explainer = shap.TreeExplainer(model_to_explain)
    shap_values = explainer.shap_values(X_holdout)

    # Get SHAP values as DataFrame (one per class)
    shap_dfs = {}
    
    if isinstance(shap_values, list):
        # Standard SHAP output for multiclass: list of [n_samples, n_features] arrays (one per class)
        for i, class_shap in enumerate(shap_values):
            shap_dfs[f"class_{i}"] = pd.DataFrame(class_shap, columns=X_holdout.columns, index=X_holdout.index)
    elif isinstance(shap_values, np.ndarray) and shap_values.ndim == 3:
        # SHAP returned shape: (n_samples, n_features, n_classes)
        n_classes = shap_values.shape[2]
        for i in range(n_classes):
            shap_dfs[f"class_{i}"] = pd.DataFrame(shap_values[:,:,i], columns=X_holdout.columns, index=X_holdout.index)
    else:
        # Binary or regression: single 2D array
        shap_dfs["shap_values"] = pd.DataFrame(shap_values, columns=X_holdout.columns, index=X_holdout.index)
else:
    logging.warning(f"Model ({best_model.__class__.__name__}) is NOT compatible with SHAP TreeExplainer.")
    print("Please select one of the following tree models for SHAP analysis: 'et', 'rf', 'gbc', 'lightgbm', 'dt'")
    print("Example:")
    print("rf_model = create_model('rf')\nrf_model = finalize_model(rf_model)\n")


In [None]:
top_n = 10  # Number of features to show

if is_compatible:
	# Compute the global maximum value across all classes
	global_max = max(df.abs().mean(axis=0).max() for df in shap_dfs.values()) * 1.25

	top_overall_features = {}

	for class_label in OUTPUT.plot_label2name.keys():
		df = shap_dfs[class_label]
		# Compute mean absolute SHAP value for each feature
		feature_importance = df.abs().mean(axis=0).sort_values(ascending=False)
		# Get top features
		top_features = feature_importance.head(top_n)
		for i, feature in enumerate(top_features.index):
			if feature not in top_overall_features:
				top_overall_features[feature] = [0, []]  # Initialize with zero and None
			top_overall_features[feature][0] += top_features[feature]
			top_overall_features[feature][1].append(OUTPUT.plot_label2name[class_label])
		
		# Bar plot
		plt.figure(figsize=(6, 4))
		colors = [OUTPUT.color_name2color[OUTPUT.plot_label2name[class_label]]] * len(top_features)
		ax = top_features[::-1].plot(kind='barh', color=colors)
		plt.title(f"Top {top_n} Features by Mean(|SHAP|) for {OUTPUT.plot_label2name[class_label]}")
		plt.xlabel("Mean(|SHAP Value|)")
		plt.xlim(0, global_max)  # Set the x-axis limit to the global maximum
		# plt.xlim(0, .0325)  # Set the x-axis limit to the global maximum
		
		# Add labels with values to the right of each bar
		for p in ax.patches:
			value = f"{p.get_width():.4f}"
			ax.annotate(value,
						(p.get_width() + 0.001, p.get_y() + p.get_height() / 2),
						ha='left', va='center', fontsize=10, color='black', xytext=(0, 0),
						textcoords='offset points')
		
		plt.tight_layout()
		plt.show()

	# Create a DataFrame for the overall top features
	top_overall_df = pd.DataFrame.from_dict(
		{feature: {"Mean(|SHAP|)": values[0], "Class": values[1]} for feature, values in top_overall_features.items()},
		orient='index'
	)
	top_overall_df['Mean(|SHAP|)'] = top_overall_df.apply(
		lambda row: row['Mean(|SHAP|)'] / len(row['Class']), axis=1
	)
	top_overall_df = top_overall_df.sort_values(by='Mean(|SHAP|)', ascending=False)
	# Display the overall top features
	print(f"Overall Top {top_n} Features by Mean(|SHAP|):")
	display(top_overall_df.head(top_n))
	# save_as_csv(top_overall_df, TMP_PATH, f'panic_severity_multi_top_features_{dbp_param}days_{version}({scraped_data_filename})', index=True)

	# === GLOBAL MEAN ACROSS ALL CLASSES AND SAMPLES ===
	# Stack all SHAP values (for all classes) vertically and compute mean across all samples and classes
	all_abs_shap = np.vstack([df.abs().values for df in shap_dfs.values()])
	global_feature_importance = pd.DataFrame({
		'Feature': X_holdout.columns,
		'Global Mean(|SHAP|)': all_abs_shap.mean(axis=0)
	}).set_index('Feature')
	global_top_features = global_feature_importance.sort_values(by='Global Mean(|SHAP|)', ascending=False).head(top_n)
	print(f"\nTop {top_n} Features by GLOBAL Mean(|SHAP|) Across All Classes and Samples:")
	display(global_top_features)
	# save_as_csv(global_top_features, TMP_PATH, f'panic_severity_multi_global_top_features_{dbp_param}days_{version}({scraped_data_filename})', index=True)
else:
    logging.warning(f"Model ({best_model.__class__.__name__}) is NOT compatible with SHAP TreeExplainer.")
    print(f"Best model ({best_model.__class__.__name__}) is NOT compatible with SHAP TreeExplainer.")
    print("Please select one of the following tree models for SHAP analysis: 'et', 'rf', 'gbc', 'lightgbm', 'dt'")
    print("Example:")
    print("rf_model = create_model('rf')\nrf_model = finalize_model(rf_model)\n")

In [None]:
top_n = 10

if is_compatible:
	for class_label, df in shap_dfs.items():
		# 1. Compute top N features by mean(|SHAP value|) for this class
		feature_importance = df.abs().mean(axis=0).sort_values(ascending=False)
		top_features = feature_importance.head(top_n).index.tolist()

		# 2. Subset SHAP values and features
		shap_values_top = df[top_features]
		X_top = X_holdout[top_features]

		# 3. Beeswarm plot (summary plot) for this class and top features only
		plt.figure(figsize=(8, 5))
		shap.summary_plot(
			shap_values_top.values,    # SHAP values: shape (n_samples, n_top_features)
			X_top,                    # Input features for those columns
			feature_names=top_features,
			show=False,               # So we can modify the plot
			plot_size=(8, 5)
		)
		plt.title(f"SHAP Beeswarm: Top {top_n} Features ({OUTPUT.plot_label2name[class_label]})")
		plt.tight_layout()
		plt.show()
else:
	logging.warning(f"Model ({best_model.__class__.__name__}) is NOT compatible with SHAP TreeExplainer.")
	print(f"Best model ({best_model.__class__.__name__}) is NOT compatible with SHAP TreeExplainer.")
	print("Please select one of the following tree models for SHAP analysis: 'et', 'rf', 'gbc', 'lightgbm', 'dt'")
	print("Example:")
	print("rf_model = create_model('rf')\nrf_model = finalize_model(rf_model)\n")