In [2]:
# Models to try based on the results
# 1. Random Forest Classifier
# 2. LightGBM Classifier
# 3. XGBoost Classifier
# 4. LinearDiscriminantAnalysis
# 5. KNN?

import pandas as pd
import numpy as np
from time import time
from sklearn.model_selection import cross_validate, LeaveOneOut
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, make_scorer, precision_score, recall_score, f1_score

# Models
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier


In [None]:


def train_evaluate_classifiers(X_train, y_train, X_test, y_test, random_state=42):
    """
    Train and evaluate multiple classifiers using 5-fold cross validation
    
    Parameters:
    -----------
    X_train : pd.DataFrame
        Training features
    y_train : pd.Series
        Training labels
    X_test : pd.DataFrame
        Test features
    y_test : pd.Series
        Test labels
    random_state : int
        Random seed for reproducibility
        
    Returns:
    --------
    dict : Dictionary containing results for each classifier
    """
    
    # Define classifiers
    classifiers = {
        "RandomForest": {
            "model": RandomForestClassifier(random_state=random_state),
            "params": {
                'n_estimators': np.arange(100, 1000, 100),
                'max_depth': np.arange(3, 11, dtype=int),
                'min_samples_split': np.linspace(0.1, 1.0, 10),
                'min_samples_leaf': np.linspace(0.1, 0.5, 5),
                'max_features': ['sqrt', 'log2'],
                'bootstrap': [True, False],
            }
        },
        "GradientBoosting": {
            "model": GradientBoostingClassifier(random_state=random_state),
            "params": {
                'n_estimators': np.arange(100, 1000, 100),
                'learning_rate': np.linspace(0.01, 0.3, 10),
                'max_depth': np.arange(3, 11, dtype=int),
                'min_samples_split': np.linspace(0.1, 1.0, 10),
                'min_samples_leaf': np.linspace(0.1, 0.5, 5),
                'max_features': ['sqrt', 'log2'],
            }
        },
        "XGBoost": {
            "model": XGBClassifier(
                random_state=random_state, 
                eval_metric='mlogloss',

            ),
            "params": {
                'max_depth': np.linspace(3, 11, dtype=int),
                'learning_rate': np.linspace(0.1, 0.3, 10),
                'n_estimators': np.arange(100, 1000, 100),
                'gamma': np.linspace(0, 5, 6),
            }
        },
        "$k$NN": {
            "model": KNeighborsClassifier(),
            "params": {
                'n_neighbors': np.arange(1, 31),
                'weights': ['uniform', 'distance'],
                'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                'leaf_size': np.arange(10, 100, 10),
                "p": [1, 2]
            }
        },
        "LGBM": {
            "model": LGBMClassifier(random_state=random_state, verbosity=-1, verbose=-1),
            "params": {
                'num_leaves': np.arange(20, 130, 10),
                'reg_alpha': [0.1, 0.2, 0.3, 0.4, 0.5],
                'min_data_in_leaf': [5, 10, 20],
                'lambda_l1': [0, 1, 1.5],
                'lambda_l2': [0, 1],
                'data_sample_strategy': ["bagging", "goss"],
                }
        },
        "Decision Tree": {
            "model": DecisionTreeClassifier(random_state=random_state),
            "params": {
                'max_depth': np.arange(3, 11, dtype=int),
                'min_samples_split': np.linspace(0.05, 0.9, 20),
                'min_samples_leaf': np.linspace(0.05, 0.9, 20),
                'max_features': ['sqrt', 'log2', None],
                'criterion': ['gini', 'entropy', 'log-loss'],
            }
        },
    }
    
    # Define custom scoring functions with zero_division=0
    def precision_scorer(y_true, y_pred):
        return precision_score(y_true, y_pred, average='macro', zero_division=0)

    def recall_scorer(y_true, y_pred):
        return recall_score(y_true, y_pred, average='macro', zero_division=0)

    def f1_scorer(y_true, y_pred):
        return f1_score(y_true, y_pred, average='macro', zero_division=0)

    TRIALS = 10

    # Create scorers
    scoring = {
        'accuracy': 'accuracy',
        'precision_macro': make_scorer(precision_scorer),
        'recall_macro': make_scorer(recall_scorer),
        'f1_macro': make_scorer(f1_scorer)
    }

    results = {}
    
    for name, clf_dict in classifiers.items():
        print(f"Training {name} classifier...")
        # Start timing
        start_time = time()
        
        # Get the model from the dictionary
        clf = clf_dict['model']
        
        # For small datasets or datasets with tiny classes
        if X_train.shape[0] < 500:
            cv = LeaveOneOut()  # Use LOO for very small datasets
        else:
            # Count samples in smallest class
            class_counts = y_train.value_counts()
            min_class_samples = class_counts.min()
            

            n_splits = min(5, min_class_samples)  # Use at most 5 splits, but no more than samples in smallest class
            print(f"Using {n_splits} splits due to small class size ({min_class_samples} samples in smallest class)")
            cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)

            # # Choose appropriate number of splits based on smallest class
            # if min_class_samples < 10:
            #     n_splits = min(5, min_class_samples)  # Use at most 5 splits, but no more than samples in smallest class
            #     print(f"Using {n_splits} splits due to small class size ({min_class_samples} samples in smallest class)")
            #     cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
            # else:
            #     cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=random_state)
        
        # Perform cross-validation with random search
        cv_results = RandomizedSearchCV(clf, 
                                        clf_dict['params'], 
                                        cv=cv, 
                                        scoring=scoring, 
                                        refit='f1_macro', 
                                        n_iter=TRIALS,
                                        n_jobs=-1,
                                        random_state=random_state, 
                                        verbose=4)
        cv_results.fit(X_train, y_train.values.ravel())
        
        # Fit on full training data and evaluate on test set
        best_clf = cv_results.best_estimator_
        test_pred_encoded = best_clf.predict(X_test)

        test_scores = precision_recall_fscore_support(y_test.values, test_pred_encoded, average='macro', zero_division=0)
        test_accuracy = accuracy_score(y_test.values.ravel(), test_pred_encoded)
        
        # Store results
        results[name] = {
            'cv_accuracy': cv_results.cv_results_['mean_test_accuracy'].mean(),
            'cv_accuracy_std': cv_results.cv_results_['std_test_accuracy'].mean(),
            'cv_precision': cv_results.cv_results_['mean_test_precision_macro'].mean(),
            'cv_recall': cv_results.cv_results_['mean_test_recall_macro'].mean(),
            'cv_f1': cv_results.cv_results_['mean_test_f1_macro'].mean(),
            'test_accuracy': test_accuracy,
            'test_precision': test_scores[0],
            'test_recall': test_scores[1],
            'test_f1': test_scores[2],
            'training_time': time() - start_time,
            'fitted_model': best_clf,
            'best_params': cv_results.best_params_
        }
        
        print(f"DONE: {results[name]}")
    
    return results

RESULTS = {}

# Train and test the model for all datasets
approaches = {'Initial': ['real_data', 'synth_data'], 'Extra': ['real_data', 'real_pseudoreal_data', 'real_pseudoreal_synth_data']}

for approach, datasets in approaches.items():
    for dataset in datasets:
        # Load the preprocessed data
        X_train = pd.read_csv(f"./datasets/preprocessed/{approach}/{dataset}/X_train.csv")
        y_train = pd.read_csv(f"./datasets/preprocessed/{approach}/{dataset}/y_train.csv")
        X_test = pd.read_csv(f"./datasets/preprocessed/{approach}/{dataset}/X_test.csv")
        y_test = pd.read_csv(f"./datasets/preprocessed/{approach}/{dataset}/y_test.csv")
        

        print(f"\nEvaluating on {approach}_{dataset} dataset:")

        # Train and evaluate
        results = train_evaluate_classifiers(X_train, y_train, X_test, y_test)
        
        # Store results
        RESULTS[f"{approach}_{dataset}"] = results

        # Print results
        for clf_name, metrics in results.items():
            print(f"\n{clf_name}:")
            print(f"CV Accuracy: {metrics['cv_accuracy']:.3f} (±{metrics['cv_accuracy_std']:.3f})")
            print(f"Test Accuracy: {metrics['test_accuracy']:.3f}")
            print(f"Test F1-Score: {metrics['test_f1']:.3f}")
            print(f"Training Time: {metrics['training_time']:.2f} seconds")

        results_df = pd.DataFrame.from_dict(results, orient='index')
        print(results_df)

# Save results to global variable
import pickle
with open("results.pkl", "wb") as f:
    pickle.dump(RESULTS, f)


Evaluating on Initial_real_data dataset:
Training RandomForest classifier...
Using 5 splits due to small class size (219 samples in smallest class)
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END bootstrap=False, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.8, n_estimators=800; accuracy: (test=0.392) f1_macro: (test=0.288) precision_macro: (test=0.331) recall_macro: (test=0.390) total time=   0.9s
[CV 3/5] END bootstrap=True, max_depth=3, max_features=log2, min_samples_leaf=0.5, min_samples_split=0.6, n_estimators=600; accuracy: (test=0.165) f1_macro: (test=0.041) precision_macro: (test=0.024) recall_macro: (test=0.143) total time=   0.8s
[CV 2/5] END bootstrap=False, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.8, n_estimators=800; accuracy: (test=0.395) f1_macro: (test=0.295) precision_macro: (test=0.318) recall_macro: (test=0.397) total time=   0.9s
[CV 5/5] END bootstrap=True, max_depth=3, max_feat

15 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/utils/_param_valida

                  cv_accuracy  cv_accuracy_std  cv_precision  cv_recall  \
RandomForest         0.262006         0.007073      0.156217   0.244277   
GradientBoosting     0.428732         0.019694      0.358140   0.416762   
XGBoost              0.747021         0.013139      0.744817   0.743029   
$k$NN                0.719528         0.014621      0.718894   0.713661   
LGBM                 0.815575         0.015641      0.812625   0.812598   
Decision Tree             NaN              NaN           NaN        NaN   

                     cv_f1  test_accuracy  test_precision  test_recall  \
RandomForest      0.144061       0.454327        0.498457     0.434592   
GradientBoosting  0.363554       0.762019        0.754470     0.756634   
XGBoost           0.736935       0.824519        0.817325     0.818084   
$k$NN             0.693685       0.788462        0.774037     0.778879   
LGBM              0.811235       0.838942        0.830725     0.833158   
Decision Tree          NaN    

15 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "/home/iven/.pyenv/versions/3.12.9/lib/python3.12/site-packages/sklearn/utils/_param_valida

                  cv_accuracy  cv_accuracy_std  cv_precision  cv_recall  \
RandomForest         0.261333         0.001149      0.143226   0.223751   
GradientBoosting     0.432502         0.004249      0.348625   0.403505   
XGBoost              0.726921         0.005961      0.705863   0.707172   
$k$NN                0.673960         0.005088      0.647647   0.651463   
LGBM                 0.732483         0.003351      0.714018   0.714952   
Decision Tree             NaN              NaN           NaN        NaN   

                     cv_f1  test_accuracy  test_precision  test_recall  \
RandomForest      0.120179       0.418269        0.464453     0.403980   
GradientBoosting  0.353572       0.728365        0.718477     0.721390   
XGBoost           0.704238       0.788462        0.785037     0.782660   
$k$NN             0.645156       0.754808        0.752834     0.746050   
LGBM              0.714036       0.814904        0.809672     0.810658   
Decision Tree          NaN    

In [7]:
for dataset_name, dataset_results in RESULTS.items():
    print(f"Results for {dataset_name.capitalize()} Dataset:\n")
    for clf_name, metrics in dataset_results.items():
        print(f"Classifier: {clf_name}")
        print(f"  CV Accuracy: {metrics['cv_accuracy']:.3f} (±{metrics['cv_accuracy_std']:.3f})")
        print(f"  CV Precision: {metrics['cv_precision']:.3f}")
        print(f"  CV Recall: {metrics['cv_recall']:.3f}")
        print(f"  CV F1-Score: {metrics['cv_f1']:.3f}")
        print(f"  Test Accuracy: {metrics['test_accuracy']:.3f}")
        print(f"  Test F1-Score: {metrics['test_f1']:.3f}")
        print(f"  Training Time: {metrics['training_time']:.2f} seconds")
        print(f"  Best Parameters: {metrics['best_params']}")
        print()

Results for Initial_real_data Dataset:

Classifier: RandomForest
  CV Accuracy: 0.210 (±0.004)
  CV Precision: 0.086
  CV Recall: 0.191
  CV F1-Score: 0.089
  Test Accuracy: 0.389
  Test F1-Score: 0.279
  Training Time: 3.82 seconds
  Best Parameters: {'n_estimators': np.int64(800), 'min_samples_split': np.float64(0.8), 'min_samples_leaf': np.float64(0.1), 'max_features': 'sqrt', 'max_depth': np.int64(3), 'bootstrap': False}

Classifier: GradientBoosting
  CV Accuracy: 0.325 (±0.014)
  CV Precision: 0.228
  CV Recall: 0.309
  CV F1-Score: 0.237
  Test Accuracy: 0.651
  Test F1-Score: 0.630
  Training Time: 5.27 seconds
  Best Parameters: {'n_estimators': np.int64(100), 'min_samples_split': np.float64(0.6), 'min_samples_leaf': np.float64(0.1), 'max_features': 'log2', 'max_depth': np.int64(4), 'learning_rate': np.float64(0.07444444444444444)}

Classifier: XGBoost
  CV Accuracy: 0.750 (±0.013)
  CV Precision: 0.748
  CV Recall: 0.746
  CV F1-Score: 0.740
  Test Accuracy: 0.825
  Test F1-S

In [5]:
markdown_text = "# Baseline Classifier Evaluation Results\n\n"
markdown_text += """
> The following table shows the results of the baseline classifiers on the real and synthetic datasets.
> The results are based on 5-fold cross-validation with default parameters.
> The results are displayed in descending order of F1-Score.

"""

# make a markdown table from the results
for dataset_name, results in RESULTS.items():
    markdown_text += f"## {dataset_name.capitalize()} Dataset\n\n"
    
    # Create table header
    markdown_text += "| Classifier | CV Accuracy | Test Accuracy | Test F1-Score | Training Time (s) | Efficiency (F1/s) |\n"
    markdown_text += "|------------|-------------|---------------|---------------|------------------| ------------------ |\n"
    
    # Order by f1-score
    results_ord = {k: v for k, v in sorted(results.items(), key=lambda item: item[1]['test_f1'], reverse=True)}

    # Add each classifier's results as a row
    for clf_name, metrics in results_ord.items():
        training_time_to_f1_ratio = metrics['test_f1'] / metrics['training_time']
        markdown_text += f"| {clf_name} | {metrics['cv_accuracy']:.3f} (±{metrics['cv_accuracy_std']:.3f}) | "
        markdown_text += f"{metrics['test_accuracy']:.3f} | {metrics['test_f1']:.3f} | {metrics['training_time']:.2f} | {training_time_to_f1_ratio:.2f} |\n"
    markdown_text += "\n"  # Add space between dataset tables

# Write to file
with open('tuning_results.md', 'w') as f:
    f.write(markdown_text)

print("Results have been written to tuning_results.md")

Results have been written to tuning_results.md


In [6]:
from sklearn.ensemble import VotingClassifier, StackingClassifier
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from time import time
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import seaborn as sns

# No need to reload RESULTS as it's already loaded in the notebook
with open("results.pkl", "rb") as f:
    RESULTS = pickle.load(f)

# Collect all models and their metrics across all datasets
all_models = []
for dataset_name, dataset_results in RESULTS.items():
    for clf_name, metrics in dataset_results.items():
        all_models.append({
            'dataset': dataset_name,
            'classifier': clf_name,
            'model': metrics['fitted_model'],
            'test_f1': metrics['test_f1'],
            'test_accuracy': metrics['test_accuracy'],
            'training_time': metrics['training_time']
        })

# Sort models by F1-score (descending)
all_models_sorted = sorted(all_models, key=lambda x: x['test_f1'], reverse=True)

# Select top 5 models 
top_models = all_models_sorted[:5]
print("Top 5 models selected for ensemble:")
for i, model in enumerate(top_models):
    print(f"{i+1}. {model['dataset']} - {model['classifier']}: F1={model['test_f1']:.3f}")

# Create estimators for Voting Classifier
estimators = [(f"{model['dataset']}_{model['classifier']}", model['model']) for model in top_models]

# Create ensemble - try soft voting for classifiers that support predict_proba
try:
    # Check which models support predict_proba
    support_proba = []
    for name, model in estimators:
        try:
            if hasattr(model, 'predict_proba'):
                support_proba.append(True)
            else:
                support_proba.append(False)
        except:
            support_proba.append(False)
    
    # Use soft voting if all models support it, otherwise hard
    voting_method = 'soft' if all(support_proba) else 'hard'
    print(f"Using {voting_method} voting method")
    
    # Create weighted ensemble
    weights = [model['test_f1'] for model in top_models]
    ensemble = VotingClassifier(estimators=estimators, voting=voting_method, weights=weights)
except:
    print("Falling back to hard voting without weights")
    ensemble = VotingClassifier(estimators=estimators, voting='hard')

# Prepare also a Stacking Classifier
stacking = StackingClassifier(
    estimators=estimators,
    final_estimator=RESULTS['real+synthetic']['KNN']['fitted_model'],
    cv=5
)

# Get test data from the real dataset
X_test = DATASETS["real"]["X"]["test"]
y_test = DATASETS["real"]["y"]["test"]
X_train = DATASETS["real"]["X"]["train"]
y_train = DATASETS["real"]["y"]["train"]

# Fit and evaluate VotingClassifier
start_time = time()
ensemble.fit(X_train, y_train)
ensemble_pred = ensemble.predict(X_test)
ensemble_time = time() - start_time

# Fit and evaluate StackingClassifier
start_time = time()
stacking.fit(X_train, y_train)
stacking_pred = stacking.predict(X_test)
stacking_time = time() - start_time

# Calculate metrics
ensemble_accuracy = accuracy_score(y_test, ensemble_pred)
ensemble_precision, ensemble_recall, ensemble_f1, _ = precision_recall_fscore_support(
    y_test, ensemble_pred, average='macro'
)

stacking_accuracy = accuracy_score(y_test, stacking_pred)
stacking_precision, stacking_recall, stacking_f1, _ = precision_recall_fscore_support(
    y_test, stacking_pred, average='macro'
)

print("\nVotingClassifier Performance:")
print(f"  Accuracy: {ensemble_accuracy:.3f}")
print(f"  F1-Score: {ensemble_f1:.3f}")
print(f"  Precision: {ensemble_precision:.3f}")
print(f"  Recall: {ensemble_recall:.3f}")

print("\nStackingClassifier Performance:")
print(f"  Accuracy: {stacking_accuracy:.3f}")
print(f"  F1-Score: {stacking_f1:.3f}")
print(f"  Precision: {stacking_precision:.3f}")
print(f"  Recall: {stacking_recall:.3f}")

# Display detailed classification reports
class_names = ['Insufficient_Weight', 'Normal_Weight', 'Overweight_Level_I',
               'Overweight_Level_II', 'Obesity_Type_I', 'Obesity_Type_II', 'Obesity_Type_III']

print("\nVotingClassifier Classification Report:")
print(classification_report(y_test, ensemble_pred, target_names=class_names))

print("\nStackingClassifier Classification Report:")
print(classification_report(y_test, stacking_pred, target_names=class_names))

# Compare performance with individual models
results_comparison = pd.DataFrame([
    {'Model': 'VotingClassifier', 'Accuracy': ensemble_accuracy, 'F1-Score': ensemble_f1, 
     'Precision': ensemble_precision, 'Recall': ensemble_recall, 'Time': ensemble_time},
    {'Model': 'StackingClassifier', 'Accuracy': stacking_accuracy, 'F1-Score': stacking_f1, 
     'Precision': stacking_precision, 'Recall': stacking_recall, 'Time': stacking_time}
])

# Use concat instead of append (which is deprecated)
for model in top_models:
    model_df = pd.DataFrame({
        'Model': [f"{model['dataset']}_{model['classifier']}"],
        'Accuracy': [model['test_accuracy']],
        'F1-Score': [model['test_f1']],
        'Precision': [None],  # We don't have this data readily available
        'Recall': [None]      # We don't have this data readily available
    })
    results_comparison = pd.concat([results_comparison, model_df], ignore_index=True)

print("\nModel Comparison:")
display(results_comparison)

# Save the best ensemble model
best_ensemble = ensemble if ensemble_f1 > stacking_f1 else stacking
best_name = "voting_ensemble" if ensemble_f1 > stacking_f1 else "stacking_ensemble"
with open(f'{best_name}_classifier.pkl', 'wb') as f:
    pickle.dump(best_ensemble, f)
print(f"\nBest ensemble classifier ({best_name}) saved to '{best_name}_classifier.pkl'")

# Plot performance comparison
plt.figure(figsize=(12, 6))
sns.barplot(x='Model', y='F1-Score', data=results_comparison)
plt.title('F1-Score Comparison Between Ensemble Models and Individual Models')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Top 5 models selected for ensemble:
1. Initial_real_data - XGBoost: F1=0.815
2. Initial_real_data - LGBM: F1=0.810
3. Initial_synth_data - LGBM: F1=0.809
4. Initial_synth_data - XGBoost: F1=0.778
5. Initial_synth_data - $k$NN: F1=0.735
Using soft voting method


KeyError: 'real+synthetic'

In [None]:
import shap
import matplotlib.pyplot as plt

# Get the XGBoost model and sample data
xgb_model = RESULTS['real']['XGBoost']['fitted_model']

# Create a SHAP explainer for the model
explainer = shap.TreeExplainer(xgb_model)

# Calculate SHAP values for a subset of training data (100 samples for visualization)
shap_values = explainer.shap_values(X_train[:100])

# Create summary plot
plt.figure(figsize=(12, 8))
shap.summary_plot(shap_values, X_train[:100], feature_names=X_train.columns, show=False)
plt.title('SHAP Summary Plot for XGBoost Model')
plt.tight_layout()
plt.show()

# Bar plot of mean absolute SHAP values
plt.figure(figsize=(12, 6))
shap.summary_plot(shap_values, X_train[:100], feature_names=X_train.columns, plot_type="bar", show=False)
plt.title('Mean Impact on Model Output (Mean Absolute SHAP Values)')
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Create a dataframe from the results
data = []
for dataset_name, results in RESULTS.items():
    for clf_name, metrics in results.items():
        data.append({
            'Dataset': dataset_name.capitalize(),
            'Classifier': clf_name,
            'CV Accuracy': metrics['cv_accuracy'],
            'Test Accuracy': metrics['test_accuracy'],
            'Test F1-Score': metrics['test_f1']
        })

df_metrics = pd.DataFrame(data)

# Create a grouped bar plot
plt.figure(figsize=(15, 8))
metrics = ['CV Accuracy', 'Test Accuracy', 'Test F1-Score']
x = np.arange(len(df_metrics['Classifier'].unique()))
width = 0.25

for i, dataset in enumerate(df_metrics['Dataset'].unique()):
    dataset_data = df_metrics[df_metrics['Dataset'] == dataset]
    plt.bar(x + i*width, dataset_data['Test F1-Score'], 
            width, label=f'{dataset}', alpha=0.8)

plt.xlabel('Classifier')
plt.ylabel('F1-Score')
plt.title('Classifier Performance Comparison Across Datasets')
plt.xticks(x + width, df_metrics['Classifier'].unique(), ha='center')
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Evaluate the RandomForest model on real+synthetic dataset
# Shows confusion matrix 
# Showsand per-class F1/precision/recall (imbalanced)

def evaluate_model_with_report(model, X_test, y_test, class_names):
    from sklearn.metrics import confusion_matrix, classification_report
    import matplotlib.pyplot as plt
    import seaborn as sns
    import pandas as pd

    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()

    report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True)
    return pd.DataFrame(report).transpose()

model = RESULTS["real+synthetic"]["RandomForest"]["fitted_model"]
X_test = DATASETS["real+synthetic"]["X"]["test"]
y_test = DATASETS["real+synthetic"]["y"]["test"]

class_names = ['Insufficient_Weight', 'Normal_Weight', 'Overweight_Level_I',
               'Overweight_Level_II', 'Obesity_Type_I', 'Obesity_Type_II', 'Obesity_Type_III']

report_df = evaluate_model_with_report(model, X_test, y_test, class_names)
display(report_df)



In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

# Load the test labels
y_test = DATASETS["real+synthetic"]["y"]["test"]

# LabelEncoder initialisieren und fitten
label_encoder = LabelEncoder()
label_encoder.fit([
    'Insufficient_Weight',
    'Normal_Weight',
    'Overweight_Level_I',
    'Overweight_Level_II',
    'Obesity_Type_I',
    'Obesity_Type_II',
    'Obesity_Type_III'
])

# Zielklassen im Testset dekodieren
y_test_named = pd.Series(label_encoder.inverse_transform(y_test), name="Klasse")

# Plot
plt.figure(figsize=(10, 5))
sns.countplot(x=y_test_named, order=y_test_named.value_counts().index)
plt.title("Verteilung der Zielklassen im Testset")
plt.xticks(rotation=45)
plt.xlabel("Zielklasse")
plt.ylabel("Anzahl")
plt.tight_layout()
plt.show()




<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a441f35e-4b4c-4c50-b56a-1aea6b800ed8' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>
