In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from imblearn.metrics import specificity_score, sensitivity_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier


features = pd.read_csv('reg_features.csv')
labels = pd.read_csv('labeled_bgl.csv')



In [20]:
print(labels.shape, features.shape)

(559, 2) (559, 60)


In [21]:
print(labels.head())
print(labels.columns)


     BGL  Label
0   92.0      0
1  301.0      2
2  156.0      1
3   94.0      0
4   90.0      0
Index(['BGL', 'Label'], dtype='object')


In [22]:
labels = pd.read_csv('labeled_bgl.csv')['Label']

In [24]:
print(features.head())
print(features.columns)


    Age  Gender     HR  SPO2  maxBP  minBP  TGS2603_MEAN  TGS2603_IQR  \
0  26.0     0.0   93.0  98.0  120.0   80.0          2.73         0.09   
1  64.0     0.0  101.0  98.0  140.0   80.0          2.74         0.09   
2  79.0     0.0   71.0  99.0  150.0   70.0          2.74         0.41   
3  65.0     0.0   70.0  97.0  130.0   80.0          2.98         0.05   
4  45.0     0.0   56.0  99.0  110.0   70.0          2.41         0.95   

   TGS2603_PTP  TGS2603_RMS  ...  TGS822_BW  MQ138_MEAN  MQ138_IQR  MQ138_PTP  \
0         0.13         2.73  ...       1.17        2.48       0.54       0.75   
1         0.12         2.74  ...       1.04        2.41       0.11       0.17   
2         1.49         2.77  ...       1.87        0.00       0.00       0.00   
3         0.11         2.98  ...       0.10        2.36       0.15       0.20   
4         1.69         2.48  ...       0.00        2.19       0.14       0.23   

   MQ138_RMS  MQ138_INT  MQ138_SQ_INT  MQ138_ENERGY  MQ138_POWER  MQ138_BW

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [27]:
y_test

513    1
255    0
313    0
159    0
514    2
      ..
537    0
432    0
136    0
291    0
515    2
Name: Label, Length: 112, dtype: int64

In [33]:
models = {
    "SVM": {
        "model": SVC(),
        "params": {
            'C': np.logspace(-3, 3, 7),
            'gamma': ['scale', 'auto'] + list(np.logspace(-3, 3, 7)),
            'kernel': ['rbf', 'poly']
        }
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            'max_depth': [None, 10, 30, 50],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    },
    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            'n_estimators': [100, 200],
            'max_depth': [None, 10, 30],
            'min_samples_split': [2, 5]
        }
    },
    "Gradient Boost": {
        "model": GradientBoostingClassifier(),
        "params": {
            'n_estimators': [50, 100],
            'learning_rate': [0.01, 0.1],
            'max_depth': [3, 5]
        }
    },
    "XGBoost": {
        "model": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
        "params": {
            'n_estimators': [50, 100],
            'learning_rate': [0.01, 0.1],
            'max_depth': [3, 6]
        }
    }
}


In [None]:

# Train/evaluate models
results = {}
for name, config in models.items():
    print(f"\n======= Training {name} =======")
    
    # Randomized search with 5-fold stratified CV
    search = RandomizedSearchCV(
        estimator=config["model"],
        param_distributions=config["params"],
        n_iter=10,
        cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),
        scoring='f1_weighted',
        n_jobs=-1,
        random_state=42
    )
    search.fit(X_train_scaled, y_train)
    
    # Best model evaluation
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test_scaled)
    
    # Store results
    results[name] = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average='weighted'),
        "recall": recall_score(y_test, y_pred, average='weighted'),
        "f1": f1_score(y_test, y_pred, average='weighted'),
        "confusion_matrix": confusion_matrix(y_test, y_pred),
        "best_params": search.best_params_
    }
    
    print(f"Best Parameters: {search.best_params_}")
    print(classification_report(y_test, y_pred))

# Compare model performances
print("\n===== Model Performance Summary =====")
for model, metrics in results.items():
    print(f"\n{model}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1-Score: {metrics['f1']:.4f}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")


Best Parameters: {'kernel': 'linear', 'gamma': 'scale', 'C': 0.1}
              precision    recall  f1-score   support

           0       0.80      0.89      0.84        72
           1       0.41      0.33      0.37        21
           2       0.40      0.32      0.35        19

    accuracy                           0.69       112
   macro avg       0.54      0.51      0.52       112
weighted avg       0.66      0.69      0.67       112


Best Parameters: {'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': None}
              precision    recall  f1-score   support

           0       0.84      0.81      0.82        72
           1       0.32      0.29      0.30        21
           2       0.42      0.53      0.47        19

    accuracy                           0.66       112
   macro avg       0.52      0.54      0.53       112
weighted avg       0.67      0.66      0.66       112


Best Parameters: {'n_estimators': 50, 'min_samples_split': 5, 'max_depth': None}
    



Best Parameters: {'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.1}
              precision    recall  f1-score   support

           0       0.87      0.92      0.89        72
           1       0.54      0.33      0.41        21
           2       0.43      0.53      0.48        19

    accuracy                           0.74       112
   macro avg       0.61      0.59      0.59       112
weighted avg       0.73      0.74      0.73       112




Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Best Parameters: {'subsample': 1.0, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.1}
              precision    recall  f1-score   support

           0       0.83      0.90      0.87        72
           1       0.64      0.33      0.44        21
           2       0.43      0.53      0.48        19

    accuracy                           0.73       112
   macro avg       0.63      0.59      0.59       112
weighted avg       0.73      0.73      0.72       112


===== Model Performance Summary =====

SVM:
Accuracy: 0.6875
Precision: 0.6593
Recall: 0.6875
F1-Score: 0.6703
Confusion Matrix:
[[64  4  4]
 [ 9  7  5]
 [ 7  6  6]]

Decision Tree:
Accuracy: 0.6607
Precision: 0.6703
Recall: 0.6607
F1-Score: 0.6640
Confusion Matrix:
[[58  7  7]
 [ 8  6  7]
 [ 3  6 10]]

Random Forest:
Accuracy: 0.7500
Precision: 0.7348
Recall: 0.7500
F1-Score: 0.7249
Confusion Matrix:
[[68  2  2]
 [11  8  2]
 [ 9  2  8]]

Gradient Boost:
Accuracy: 0.7411
Precision: 0.7330
Recall: 0.7411
F1-Score: 0.73

In [30]:

# Split data with stratification (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

In [31]:
y_train

425    0
264    1
481    0
261    0
187    0
      ..
5      0
390    0
431    0
449    0
128    2
Name: Label, Length: 447, dtype: int64

In [32]:
y_train =pd.read_csv('bgl_train.csv')['Label']

In [33]:
y_train

0      1
1      0
2      1
3      1
4      0
      ..
922    1
923    1
924    0
925    2
926    1
Name: Label, Length: 927, dtype: int64

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report,balanced_accuracy_score 
from imblearn.metrics import specificity_score, sensitivity_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

# Load features and labels
features = pd.read_csv('reg_features_new.csv')
labels = pd.read_csv('labeled_bgl.csv')['Label']



# Split data with stratification (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

X_train.drop(columns=['Gender'], inplace=True)
X_test


.drop(columns=['Gender'], inplace=True)


# Standardize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Initialize models and parameter grids
models = {
    "SVM": {
        "model": SVC(),
        "params": {
            'C':      [0.1, 0.5, 1, 5, 10, 100],
            'gamma':  ['scale', 'auto'],
            'kernel': ['poly', 'rbf', 'sigmoid','linear']
            # 'random_state': [10]
            
            }
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            'max_depth': [None, 2, 3, 5, 7, 8, 10],
            'min_samples_split': np.arange(2, 9),
            'min_samples_leaf': [1, 2, 3, 4],
            'criterion': ['gini', 'entropy'],
            'splitter': ['best', 'random']
            # 'random_state': [10]

        }
    },
    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            'n_estimators': [50,100, 200, 300],
            'max_depth': [None, 2, 3, 5, 7, 8, 10],
            'min_samples_split': np.arange(2, 9),
            'criterion': ['gini', 'entropy'],
            'max_features': ['sqrt', 'log2', 1.0]
            # 'random_state': [10]
            

        }
    },
    "Gradient Boost": {
        "model": GradientBoostingClassifier(),
        "params": {
            'n_estimators': [50, 100, 200, 300],
            'learning_rate': [0.01, 0.1],
            'max_depth':[None, 2, 3, 5, 7, 8, 10],
            'min_samples_leaf': [1, 2, 3, 4],
            'subsample': [0.8, 1.0],
            'min_samples_split': np.arange(2, 9)
            # 'random_state': [10]  

        }
    },
    "XGBoost": {
        "model": XGBClassifier(),
        "params": {
            'n_estimators': np.arange(50, 250, 50),
            'learning_rate': [0.01, 0.1, 0.05, 0.5, 1.0],
            'max_depth': np.arange(2, 11),
            'subsample': [0.1, 0.5, 0.8, 1.0],
            'colsample_bytree': [0.1, 0.5, 0.8, 1.0],
            
            'reg_lambda': [0.1, 0.5, 0.8, 1.0],
           
            # 'random_state': [10],
            'min_child_weight': [1, 2, 3, 4]
        }
    }

}

# Train/evaluate models
results = {}
for name, config in models.items():
    print(f"\n======= Training {name} =======")
    
    # Randomized search with 5-fold stratified CV
    search = RandomizedSearchCV(
        estimator=config["model"],
        param_distributions=config["params"],
        n_iter=10,
        cv=StratifiedKFold(n_splits=5 , random_state=10, shuffle=True),
        scoring='f1_weighted',
        n_jobs=-1,
        random_state=10
    )
    search.fit(X_train_scaled, y_train)
    
    # Best model evaluation
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test_scaled)
    
    # Store results
    results[name] = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average='weighted'),
        "recall": recall_score(y_test, y_pred, average='weighted'),
        "f1": f1_score(y_test, y_pred, average='weighted'),
        "confusion_matrix": confusion_matrix(y_test, y_pred),
        # "balanced_accuracy": balanced_accuracy_score(y_test, y_pred),
        "balanced_accuracy": (sensitivity_score(y_test, y_pred , average='weighted') + specificity_score(y_test, y_pred, average='weighted')) / 2,

        "best_params": search.best_params_
    }
    
    print(f"Best Parameters: {search.best_params_}")
    print(classification_report(y_test, y_pred))

# Compare model performances
print("\n===== Model Performance Summary =====")
for model, metrics in results.items():
    print(f"\n{model}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1-Score: {metrics['f1']:.4f}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")
    # print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
    print(f"balanced_accuracy", metrics['balanced_accuracy'])
    print(f"Best Parameters: {metrics['best_params']}")


Best Parameters: {'kernel': 'poly', 'gamma': 'scale', 'C': 100}
              precision    recall  f1-score   support

           0       0.85      0.89      0.87        72
           1       0.53      0.48      0.50        21
           2       0.67      0.63      0.65        19

    accuracy                           0.77       112
   macro avg       0.68      0.67      0.67       112
weighted avg       0.76      0.77      0.76       112


Best Parameters: {'splitter': 'random', 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_depth': None, 'criterion': 'gini'}
              precision    recall  f1-score   support

           0       0.74      0.82      0.78        72
           1       0.38      0.29      0.32        21
           2       0.38      0.32      0.34        19

    accuracy                           0.63       112
   macro avg       0.50      0.47      0.48       112
weighted avg       0.61      0.63      0.62       112


Best Parameters: {'n_estimators': 100, 'min_



Best Parameters: {'subsample': 1.0, 'reg_lambda': 1.0, 'n_estimators': 50, 'min_child_weight': 1, 'max_depth': 8, 'learning_rate': 0.05, 'colsample_bytree': 0.8}
              precision    recall  f1-score   support

           0       0.84      0.90      0.87        72
           1       0.47      0.33      0.39        21
           2       0.55      0.58      0.56        19

    accuracy                           0.74       112
   macro avg       0.62      0.61      0.61       112
weighted avg       0.72      0.74      0.73       112


===== Model Performance Summary =====

SVM:
Accuracy: 0.7679
Precision: 0.7604
Recall: 0.7679
F1-Score: 0.7636
Confusion Matrix:
[[64  5  3]
 [ 8 10  3]
 [ 3  4 12]]
balanced_accuracy 0.7807913860333215
Best Parameters: {'kernel': 'poly', 'gamma': 'scale', 'C': 100}

Decision Tree:
Accuracy: 0.6339
Precision: 0.6080
Recall: 0.6339
F1-Score: 0.6180
Confusion Matrix:
[[59  7  6]
 [11  6  4]
 [10  3  6]]
balanced_accuracy 0.6287915041947298
Best Parameter

In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report,balanced_accuracy_score 
from imblearn.metrics import specificity_score, sensitivity_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

# Load features and labels
# features = pd.read_csv('reg_features.csv')
# labels = pd.read_csv('labeled_bgl.csv')['Label']


# # Split data with stratification (80% train, 20% test)
# X_train, X_test, y_train, y_test = train_test_split(
#     features, labels, test_size=0.2, stratify=labels, random_state=42
# )

X_train = pd.read_csv('X_train_augmented1.csv')
y_train = pd.read_csv('y_train_augmented.csv')['Label']
X_test= pd.read_csv('X_test_original.csv')
y_test = pd.read_csv('y_test_original.csv')['Label']

# X_train.drop(columns=['Gender'], inplace=True)
# X_test.drop(columns=['Gender'], inplace=True)


# Standardize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



# Initialize models and parameter grids
models = {
    "SVM": {
        "model": SVC(),
        "params": {
            'C':      [0.1, 0.5, 1, 5, 10, 100],
            'gamma':  ['scale', 'auto'],
            'kernel': ['poly', 'rbf', 'sigmoid','linear']
            
            
            }
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            'max_depth': [None, 2, 3, 5, 7, 8, 10],
            'min_samples_split': np.arange(2, 9),
            'min_samples_leaf': [1, 2, 3, 4],
            'criterion': ['gini', 'entropy'],
            'splitter': ['best', 'random']


        }
    },
    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            'n_estimators': [50,100, 200, 300],
            'max_depth': [None, 2, 3, 5, 7, 8, 10],
            'min_samples_split': np.arange(2, 9),
            'criterion': ['gini', 'entropy'],
            'max_features': ['sqrt', 'log2', 1.0]
            
            

        }
    },
    "Gradient Boost": {
        "model": GradientBoostingClassifier(),
        "params": {
            'n_estimators': [50, 100, 200, 300],
            'learning_rate': [0.01, 0.1],
            'max_depth':[ None,2,3, 5, 7, 8, 10],
            'min_samples_leaf': [1, 2, 3, 4],
            'subsample': [0.8, 1.0],
            'min_samples_split': np.arange(2, 9)

        }
    },
    "XGBoost": {
        "model": XGBClassifier(),
        "params": {
            'n_estimators': np.arange(50, 250, 50),
            'learning_rate': [0.01, 0.1, 0.05, 0.5, 1.0],
            'max_depth': np.arange(2, 11),
            'subsample': [0.1, 0.5, 0.8, 1.0],
            'colsample_bytree': [0.1, 0.5, 0.8, 1.0],
            
            'reg_lambda': [0.1, 0.5, 0.8, 1.0],
            'min_child_weight': [1, 2, 3, 4]
            



        }
    }
}

# Train/evaluate models
results = {}
for name, config in models.items():
    print(f"\n======= Training {name} =======")
    
    # Randomized search with 5-fold stratified CV
    search = RandomizedSearchCV(
        estimator=config["model"],
        param_distributions=config["params"],
        n_iter=20,
        cv=StratifiedKFold(n_splits=5 , random_state=10, shuffle=True),
        scoring='f1_weighted',
        n_jobs=-1,
        random_state=10
    )
    search.fit(X_train_scaled, y_train)
    
    # Best model evaluation
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test_scaled)
    
    # Store results
    results[name] = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average='weighted'),
        "recall": recall_score(y_test, y_pred, average='weighted'),
        "f1": f1_score(y_test, y_pred, average='weighted'),
        "confusion_matrix": confusion_matrix(y_test, y_pred),
        # "balanced_accuracy": balanced_accuracy_score(y_test, y_pred),
        "balanced_accuracy": (sensitivity_score(y_test, y_pred , average='weighted') + specificity_score(y_test, y_pred, average='weighted')) / 2,

        "best_params": search.best_params_
    }
    
    print(f"Best Parameters: {search.best_params_}")
    print(classification_report(y_test, y_pred))

# Compare model performances
print("\n===== Model Performance Summary =====")
for model, metrics in results.items():
    print(f"\n{model}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1-Score: {metrics['f1']:.4f}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")
    # print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
    print(f"balanced_accuracy", metrics['balanced_accuracy'])
    print(f"Best Parameters: {metrics['best_params']}")


Best Parameters: {'kernel': 'poly', 'gamma': 'scale', 'C': 10}
              precision    recall  f1-score   support

           0       0.71      0.97      0.82        72
           1       0.71      0.24      0.36        21
           2       0.71      0.26      0.38        19

    accuracy                           0.71       112
   macro avg       0.71      0.49      0.52       112
weighted avg       0.71      0.71      0.66       112


Best Parameters: {'splitter': 'best', 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_depth': 2, 'criterion': 'gini'}
              precision    recall  f1-score   support

           0       0.65      0.94      0.77        72
           1       0.29      0.10      0.14        21
           2       0.00      0.00      0.00        19

    accuracy                           0.62       112
   macro avg       0.31      0.35      0.31       112
weighted avg       0.47      0.62      0.52       112


Best Parameters: {'n_estimators': 300, 'min_sample

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report,balanced_accuracy_score 
from imblearn.metrics import specificity_score, sensitivity_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

# Load features and labels
# features = pd.read_csv('reg_features.csv')
# labels = pd.read_csv('labeled_bgl.csv')['Label']


# # Split data with stratification (80% train, 20% test)
# X_train, X_test, y_train, y_test = train_test_split(
#     features, labels, test_size=0.2, stratify=labels, random_state=42
# )

X_train = pd.read_csv('X_train_augmented.csv')
y_train = pd.read_csv('y_train_augmented.csv')['Label']
X_test= pd.read_csv('X_test_original.csv')
y_test = pd.read_csv('y_test_original.csv')['Label']
# X_train.drop(columns=['Gender'], inplace=True)
# X_test.drop(columns=['Gender'], inplace=True)




# Standardize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



# Initialize models and parameter grids
models = {
    "SVM": {
        "model": SVC(),
        "params": {
            'C':      [0.1, 0.5, 1, 5, 10, 100],
            'gamma':  ['scale', 'auto'],
            'kernel': ['poly', 'rbf', 'sigmoid','linear']
            
            
            }
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            'max_depth': [None, 2, 3, 5, 7, 8, 10],
            'min_samples_split': np.arange(2, 9),
            'min_samples_leaf': [1, 2, 3, 4],
            'criterion': ['gini', 'entropy'],
            'splitter': ['best', 'random']


        }
    },
    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            'n_estimators': [50,100, 200, 300],
            'max_depth': [None, 2, 3, 5, 7, 8, 10],
            'min_samples_split': np.arange(2, 9),
            'criterion': ['gini', 'entropy'],
            'max_features': ['sqrt', 'log2', 1.0]
            
            

        }
    },
    "Gradient Boost": {
        "model": GradientBoostingClassifier(),
        "params": {
            'n_estimators': [50, 100, 200, 300],
            'learning_rate': [0.01, 0.1],
            'max_depth':[ 3, 5, 7, 8, 10],
            'min_samples_leaf': [1, 2, 3, 4],
            'subsample': [0.8, 1.0],
            'min_samples_split': np.arange(2, 9)

        }
    },
    "XGBoost": {
        "model": XGBClassifier(),
        "params": {
            'n_estimators': np.arange(50, 250, 50),
            'learning_rate': [0.01, 0.1, 0.05, 0.5, 1.0],
            'max_depth': np.arange(2, 11),
            'subsample': [0.1, 0.5, 0.8, 1.0],
            'colsample_bytree': [0.1, 0.5, 0.8, 1.0],
            
            'reg_lambda': [0.1, 0.5, 0.8, 1.0],
            'min_child_weight': [1, 2, 3, 4]
            



        }
    }
}

# Train/evaluate models
results = {}
for name, config in models.items():
    print(f"\n======= Training {name} =======")
    
    # Randomized search with 5-fold stratified CV
    search = RandomizedSearchCV(
        estimator=config["model"],
        param_distributions=config["params"],
        n_iter=10,
        cv=StratifiedKFold(n_splits=5 , random_state=10, shuffle=True),
        scoring='f1_weighted',
        n_jobs=-1,
        random_state=10
    )
    search.fit(X_train_scaled, y_train)
    
    # Best model evaluation
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test_scaled)
    
    # Store results
    results[name] = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average='weighted'),
        "recall": recall_score(y_test, y_pred, average='weighted'),
        "f1": f1_score(y_test, y_pred, average='weighted'),
        "confusion_matrix": confusion_matrix(y_test, y_pred),
        # "balanced_accuracy": balanced_accuracy_score(y_test, y_pred),
        "balanced_accuracy": (sensitivity_score(y_test, y_pred , average='weighted') + specificity_score(y_test, y_pred, average='weighted')) / 2,

        "best_params": search.best_params_
    }
    
    print(f"Best Parameters: {search.best_params_}")
    print(classification_report(y_test, y_pred))

# Compare model performances
print("\n===== Model Performance Summary =====")
for model, metrics in results.items():
    print(f"\n{model}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1-Score: {metrics['f1']:.4f}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")
    # print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
    print(f"balanced_accuracy", metrics['balanced_accuracy'])
    print(f"Best Parameters: {metrics['best_params']}")


Best Parameters: {'kernel': 'poly', 'gamma': 'scale', 'C': 100}
              precision    recall  f1-score   support

           0       0.76      0.96      0.85        72
           1       0.54      0.33      0.41        21
           2       0.88      0.37      0.52        19

    accuracy                           0.74       112
   macro avg       0.72      0.55      0.59       112
weighted avg       0.74      0.74      0.71       112


Best Parameters: {'splitter': 'random', 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_depth': None, 'criterion': 'gini'}
              precision    recall  f1-score   support

           0       0.81      0.88      0.84        72
           1       0.54      0.67      0.60        21
           2       0.88      0.37      0.52        19

    accuracy                           0.75       112
   macro avg       0.74      0.64      0.65       112
weighted avg       0.77      0.75      0.74       112


Best Parameters: {'n_estimators': 50, 'min_s

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report,balanced_accuracy_score
from imblearn.metrics import specificity_score, sensitivity_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier

# Load features and labels
X_train = pd.read_csv('X_train_augmented1.csv')
y_train = pd.read_csv('y_train_augmented1.csv')['Label']
X_test= pd.read_csv('X_test_original.csv')
y_test = pd.read_csv('y_test_original.csv')['Label']

# Drop 'Gender' if it's an issue and not handled as a categorical feature
# If 'Gender' is categorical and needs encoding, do it before scaling.
# For simplicity in this example, let's assume it's already handled or dropped.
# X_train.drop(columns=['Gender'], inplace=True, errors='ignore')
# X_test.drop(columns=['Gender'], inplace=True, errors='ignore')


# Standardize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert scaled arrays back to DataFrames with original column names
# This is useful if you intend to pass DataFrames to models (though most sklearn accept arrays)
# And especially if you want to inspect features later.
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)


# Initialize models and parameter grids for individual models
# It's crucial to find good individual models before combining them.
# I've slightly adjusted some parameter ranges for more exploration.
models = {
    "SVM": {
        "model": SVC(probability=True, random_state=10), # probability=True is needed for 'soft' voting
        "params": {
            'C':        [0.01, 0.1, 0.5, 1, 5, 10, 100],
            'gamma':    ['scale', 'auto', 0.01, 0.1, 0.5],
            'kernel':   ['rbf', 'poly', 'sigmoid','linear'] # rbf is generally a good default
        }
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(random_state=10),
        "params": {
            'max_depth': [None, 3, 5, 7, 10, 15, 20],
            'min_samples_split': np.arange(2, 15),
            'min_samples_leaf': [1, 2, 3, 4, 5, 10],
            'criterion': ['gini', 'entropy'],
            'splitter': ['best', 'random']
        }
    },
    "Random Forest": {
        "model": RandomForestClassifier(random_state=10),
        "params": {
            'n_estimators': [100, 200, 300, 500],
            'max_depth': [None, 5, 10, 15, 20],
            'min_samples_split': np.arange(2, 10),
            'criterion': ['gini', 'entropy'],
            'max_features': ['sqrt', 'log2', 0.8, 1.0] # 0.8 is a proportion
        }
    },
    "Gradient Boost": {
        "model": GradientBoostingClassifier(random_state=10),
        "params": {
            'n_estimators': [100, 200, 300, 500],
            'learning_rate': [0.01, 0.05, 0.1, 0.2],
            'max_depth':[3, 5, 7, 10], # Typically smaller for boosting
            'min_samples_leaf': [1, 2, 3, 4, 5],
            'subsample': [0.7, 0.8, 0.9, 1.0],
            'min_samples_split': np.arange(2, 8)
        }
    },
    "XGBoost": {
        "model": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=10), # Suppress warning and set eval_metric
        "params": {
            'n_estimators': np.arange(100, 500, 100),
            'learning_rate': [0.01, 0.05, 0.1, 0.2],
            'max_depth': np.arange(3, 10),
            'subsample': [0.7, 0.8, 0.9, 1.0],
            'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
            'reg_lambda': [0.1, 0.5, 1.0, 2.0],
            'reg_alpha': [0.0, 0.1, 0.5, 1.0], # L1 regularization
            'min_child_weight': [1, 2, 3, 4]
        }
    }
}

# --- Step 1: Tune Individual Models and Store Best Estimators ---
best_estimators = {}
results = {}

for name, config in models.items():
    print(f"\n======= Training {name} =======")

    search = RandomizedSearchCV(
        estimator=config["model"],
        param_distributions=config["params"],
        n_iter=20, # Increased n_iter for more thorough search
        cv=StratifiedKFold(n_splits=5 , random_state=10, shuffle=True),
        scoring='f1_weighted',
        n_jobs=-1,
        random_state=10,
        verbose=1 # Added verbose to see progress
    )
    search.fit(X_train_scaled_df, y_train) # Using df here, but array is fine

    best_model = search.best_estimator_
    best_estimators[name] = best_model # Store the best individual model

    y_pred = best_model.predict(X_test_scaled_df)

    results[name] = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average='weighted'),
        "recall": recall_score(y_test, y_pred, average='weighted'),
        "f1": f1_score(y_test, y_pred, average='weighted'),
        "confusion_matrix": confusion_matrix(y_test, y_pred),
        "balanced_accuracy": (sensitivity_score(y_test, y_pred , average='weighted') + specificity_score(y_test, y_pred, average='weighted')) / 2,
        "best_params": search.best_params_
    }

    print(f"Best Parameters for {name}: {search.best_params_}")
    print(classification_report(y_test, y_pred))

# --- Step 2: Implement Voting Classifier ---
print("\n======= Training Voting Classifier =======")

# Select the best performing individual models for the ensemble
# You might choose all, or a subset based on individual performance and diversity.
# For 'soft' voting, models must have 'predict_proba' method (SVM needs probability=True).
# You can adjust weights if some models are more trustworthy than others.
ensemble_estimators = [
    ('svm', best_estimators['SVM']),
    ('dt', best_estimators['Decision Tree']),
    ('rf', best_estimators['Random Forest']),
    ('gb', best_estimators['Gradient Boost']),
    ('xgb', best_estimators['XGBoost'])
]

# Hard Voting (majority class vote)
voting_clf_hard = VotingClassifier(estimators=ensemble_estimators, voting='hard', n_jobs=-1)
voting_clf_hard.fit(X_train_scaled_df, y_train)
y_pred_hard_voting = voting_clf_hard.predict(X_test_scaled_df)

# Soft Voting (average of predicted probabilities, requires predict_proba)
# This often performs better as it considers confidence.
# Make sure SVM has probability=True when initialized.
voting_clf_soft = VotingClassifier(estimators=ensemble_estimators, voting='soft', n_jobs=-1)
voting_clf_soft.fit(X_train_scaled_df, y_train)
y_pred_soft_voting = voting_clf_soft.predict(X_test_scaled_df)


# --- Step 3: Evaluate Voting Classifier ---
print("\n===== Voting Classifier Performance (Hard Voting) =====")
results['Voting_Hard'] = {
    "accuracy": accuracy_score(y_test, y_pred_hard_voting),
    "precision": precision_score(y_test, y_pred_hard_voting, average='weighted'),
    "recall": recall_score(y_test, y_pred_hard_voting, average='weighted'),
    "f1": f1_score(y_test, y_pred_hard_voting, average='weighted'),
    "confusion_matrix": confusion_matrix(y_test, y_pred_hard_voting),
    "balanced_accuracy": (sensitivity_score(y_test, y_pred_hard_voting , average='weighted') + specificity_score(y_test, y_pred_hard_voting, average='weighted')) / 2,
    "best_params": "N/A (Ensemble)"
}
print(classification_report(y_test, y_pred_hard_voting))


print("\n===== Voting Classifier Performance (Soft Voting) =====")
results['Voting_Soft'] = {
    "accuracy": accuracy_score(y_test, y_pred_soft_voting),
    "precision": precision_score(y_test, y_pred_soft_voting, average='weighted'),
    "recall": recall_score(y_test, y_pred_soft_voting, average='weighted'),
    "f1": f1_score(y_test, y_pred_soft_voting, average='weighted'),
    "confusion_matrix": confusion_matrix(y_test, y_pred_soft_voting),
    "balanced_accuracy": (sensitivity_score(y_test, y_pred_soft_voting , average='weighted') + specificity_score(y_test, y_pred_soft_voting, average='weighted')) / 2,
    "best_params": "N/A (Ensemble)"
}
print(classification_report(y_test, y_pred_soft_voting))

# --- Step 4: Compare All Model Performances ---
print("\n===== Model Performance Summary =====")
# Convert to a DataFrame for easier comparison and sorting
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df['balanced_accuracy'] = results_df['balanced_accuracy'].astype(float) # Ensure it's numeric for sorting
print(results_df[['accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy']].sort_values(by='balanced_accuracy', ascending=False))


# Print detailed results for all models including ensembles
for model, metrics in results.items():
    print(f"\n{model}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1-Score: {metrics['f1']:.4f}")
    print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")
    if model not in ['Voting_Hard', 'Voting_Soft']: # Best params only for individual models
        print(f"Best Parameters: {metrics['best_params']}")


Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Parameters for SVM: {'kernel': 'rbf', 'gamma': 0.1, 'C': 100}
              precision    recall  f1-score   support

           0       0.74      0.97      0.84        72
           1       0.55      0.29      0.38        21
           2       0.71      0.26      0.38        19

    accuracy                           0.72       112
   macro avg       0.67      0.51      0.53       112
weighted avg       0.70      0.72      0.68       112


Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Parameters for Decision Tree: {'splitter': 'best', 'min_samples_split': 11, 'min_samples_leaf': 10, 'max_depth': 5, 'criterion': 'entropy'}
              precision    recall  f1-score   support

           0       0.69      0.86      0.77        72
           1       0.36      0.24      0.29        21
           2       0.38      0.16      0.22        19

    accuracy                           0.62       112
   macro 



Best Parameters for Random Forest: {'n_estimators': 300, 'min_samples_split': 7, 'max_features': 'sqrt', 'max_depth': 15, 'criterion': 'entropy'}
              precision    recall  f1-score   support

           0       0.84      0.99      0.90        72
           1       0.76      0.62      0.68        21
           2       1.00      0.53      0.69        19

    accuracy                           0.84       112
   macro avg       0.87      0.71      0.76       112
weighted avg       0.85      0.84      0.83       112


Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Parameters for Gradient Boost: {'subsample': 1.0, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_depth': 10, 'learning_rate': 0.2}
              precision    recall  f1-score   support

           0       0.86      0.97      0.92        72
           1       0.68      0.62      0.65        21
           2       0.92      0.58      0.71        19

    accuracy                  

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Best Parameters for XGBoost: {'subsample': 0.7, 'reg_lambda': 0.5, 'reg_alpha': 0.5, 'n_estimators': 300, 'min_child_weight': 2, 'max_depth': 9, 'learning_rate': 0.2, 'colsample_bytree': 1.0}
              precision    recall  f1-score   support

           0       0.85      0.94      0.89        72
           1       0.76      0.62      0.68        21
           2       0.80      0.63      0.71        19

    accuracy                           0.83       112
   macro avg       0.80      0.73      0.76       112
weighted avg       0.83      0.83      0.82       112



===== Voting Classifier Performance (Hard Voting) =====
              precision    recall  f1-score   support

           0       0.85      0.99      0.91        72
           1       0.76      0.62      0.68        21
           2       1.00      0.58      0.73        19

    accuracy                           0.85       112
   macro avg       0.87      0.73      0.78       112
weighted avg       0.86      0.85      0.84

In [12]:
X_train.drop(columns=['Gender'], inplace=True)

In [13]:
X_train

Unnamed: 0,Age,HR,SPO2,maxBP,minBP,TGS2603_MEAN,TGS2603_IQR,TGS2603_PTP,TGS2603_RMS,TGS2603_INT,...,TGS822_BW,MQ138_MEAN,MQ138_IQR,MQ138_PTP,MQ138_RMS,MQ138_INT,MQ138_SQ_INT,MQ138_ENERGY,MQ138_POWER,MQ138_BW
0,40.00,86.00,98.00,126.00,84.00,1.48,0.18,0.27,1.48,1.45,...,0.02,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1,58.00,90.00,98.00,137.00,87.00,1.74,0.25,0.41,1.74,1.68,...,0.55,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2,22.00,68.00,96.00,116.00,68.00,2.63,0.65,1.58,2.68,2.57,...,2.62,2.53,0.58,0.80,2.55,2.47,6.33,270.73,11370.55,0.44
3,68.00,90.00,96.00,116.00,74.00,1.88,0.45,0.72,1.90,1.82,...,0.03,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
4,22.00,68.00,96.00,116.00,68.00,2.78,0.09,0.12,2.78,2.50,...,0.64,2.80,0.04,0.06,2.80,2.58,7.23,101.87,1324.31,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1942,17.01,52.55,97.98,82.23,85.50,2.17,1.25,1.82,0.27,1.36,...,1.20,0.01,0.00,1.56,0.06,4.66,21.30,1718.70,135848.19,0.03
1943,17.02,51.35,98.76,81.43,102.66,2.93,0.94,0.35,0.09,2.61,...,23.34,0.01,0.02,1.55,1.49,4.00,14.19,1680.73,23753.07,0.08
1944,73.95,66.56,91.87,202.15,114.79,3.15,0.00,0.28,0.00,2.65,...,0.15,0.30,0.27,0.66,4.46,1.59,21.81,659.18,2314.41,0.06
1945,29.46,51.30,99.00,82.00,115.00,3.15,0.00,0.00,0.06,1.59,...,31.49,0.29,0.03,0.00,4.68,2.58,0.27,634.42,11091.71,5.47


In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
# from sklearn.preprocessing import StandardScaler,MinMaxScaler
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report,balanced_accuracy_score 
# from imblearn.metrics import specificity_score, sensitivity_score
# from sklearn.svm import SVC
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# from xgboost import XGBClassifier
        
# # # Load features and labels
# # features = pd.read_csv('reg_features.csv')
# # labels = pd.read_csv('labeled_bgl.csv')['Label']


# # # Split data with stratification (80% train, 20% test)
# # X_train, X_test, y_train, y_test = train_test_split(
# #     features, labels, test_size=0.2, stratify=labels, random_state=42
# # )

# X_train_scaled = pd.read_csv('X_train_augmented1.csv')
# y_train = pd.read_csv('y_train_augmented1.csv')['Label']
# X_test_scaled= pd.read_csv('X_test_original.csv')
# y_test = pd.read_csv('y_test_original.csv')['Label']


# # # Standardize features
# scaler = MinMaxScaler(feature_range=(-1, 1))
# # X_train_scaled = scaler.fit_transform(X_train)
# # X_test_scaled = scaler.transform(X_test)



# # Initialize models and parameter grids
# models = {
#     # "SVM": {
#     #     "model": SVC(),
#     #     "params": {
#     #         'C':      [0.1, 0.5, 1, 5, 10, 100],
#     #         'gamma':  ['scale', 'auto'],
#     #         'kernel': ['poly', 'rbf', 'sigmoid','linear'],
#     #         'random_state': [10]
            
#     #         }
#     # },
#     # "Decision Tree": {
#     #     "model": DecisionTreeClassifier(),
#     #     "params": {
#     #         'max_depth': [None, 2, 3, 5, 7, 8, 10],
#     #         'min_samples_split': np.arange(2, 9),
#     #         'min_samples_leaf': [1, 2, 3, 4],
#             'criterion': ['gini', 'entropy'],
#     #         'splitter': ['best', 'random'],
#     #         'random_state': [10]

#     #     }
#     # },
#     "Random Forest": {
#         "model": RandomForestClassifier(),
#         "params": {
#             'n_estimators': [50,100, 200, 300],
#             'max_depth': [None, 2, 3, 5, 7, 8, 10],
#             'min_samples_split': np.arange(2, 9),
#             'criterion': ['gini', 'entropy'],
#             'max_features': ['sqrt', 'log2', 1.0],
#             'random_state': [10]
            

#         }
#     },
#     "Gradient Boost": {
#         "model": GradientBoostingClassifier(),
#         "params": {
#             'n_estimators': [50, 100, 200, 300],
#             'learning_rate': [0.01, 0.1],
#             'max_depth':[None, 2, 3, 5, 7, 8, 10],
#             'min_samples_leaf': [1, 2, 3, 4],
#             'subsample': [0.8, 1.0],
#             'min_samples_split': np.arange(2, 9),
#             'random_state': [10]  

#         }
#     },
#     "XGBoost": {
#         "model": XGBClassifier(),
#         "params": {
#             'n_estimators': [50, 100, 200, 300],
#             'learning_rate': [0.01, 0.05, 0.1, 0.5],
#             'max_depth': [None, 2, 3, 5, 7, 8, 10],
#             'subsample':      [0.5, 1.0],
#             'colsample_bytree': [0.5, 0.8, 1.0],
#             'gamma': [0, 0.1, 0.2],
#             # 'scale_pos_weight': [1, 2, 5],  # Handles class imbalance
            
#             # 'eval_metric': ['mlogloss'],
#             # 'min_samples_split': np.arange(2, 9),
#             'random_state': [10]
#         }
#     }
# }

# # Train/evaluate models
# results = {}
# for name, config in models.items():
#     print(f"\n======= Training {name} =======")
    
#     # Randomized search with 5-fold stratified CV
#     search = RandomizedSearchCV(
#         estimator=config["model"],
#         param_distributions=config["params"],
#         n_iter=10,
#         cv=StratifiedKFold(n_splits=5 , random_state=42, shuffle=True),
#         scoring='f1_weighted',
#         n_jobs=-1,
#         random_state=42
#     )
#     search.fit(X_train_scaled, y_train)
    
#     # Best model evaluation
#     best_model = search.best_estimator_
#     y_pred = best_model.predict(X_test_scaled)
    
#     # Store results
#     results[name] = {
#         "accuracy": accuracy_score(y_test, y_pred),
#         "precision": precision_score(y_test, y_pred, average='weighted'),
#         "recall": recall_score(y_test, y_pred, average='weighted'),
#         "f1": f1_score(y_test, y_pred, average='weighted'),
#         "confusion_matrix": confusion_matrix(y_test, y_pred),
#         # "balanced_accuracy": balanced_accuracy_score(y_test, y_pred),
#         "balanced_accuracy": (sensitivity_score(y_test, y_pred , average='weighted') + specificity_score(y_test, y_pred, average='weighted')) / 2,

#         "best_params": search.best_params_
#     }
    
#     print(f"Best Parameters: {search.best_params_}")
#     print(classification_report(y_test, y_pred))

# # Compare model performances
# print("\n===== Model Performance Summary =====")
# for model, metrics in results.items():
#     print(f"\n{model}:")
#     print(f"Accuracy: {metrics['accuracy']:.4f}")
#     print(f"Precision: {metrics['precision']:.4f}")
#     print(f"Recall: {metrics['recall']:.4f}")
#     print(f"F1-Score: {metrics['f1']:.4f}")
#     print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")
#     # print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
#     print(f"balanced_accuracy", metrics['balanced_accuracy'])
#     print(f"Best Parameters: {metrics['best_params']}")


Best Parameters: {'random_state': 10, 'n_estimators': 300, 'min_samples_split': 2, 'max_features': 'sqrt', 'max_depth': 10, 'criterion': 'entropy'}
              precision    recall  f1-score   support

           0       0.80      0.92      0.85        72
           1       0.64      0.43      0.51        21
           2       0.67      0.53      0.59        19

    accuracy                           0.76       112
   macro avg       0.70      0.62      0.65       112
weighted avg       0.74      0.76      0.74       112


Best Parameters: {'subsample': 0.8, 'random_state': 10, 'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_depth': 10, 'learning_rate': 0.1}
              precision    recall  f1-score   support

           0       0.86      0.92      0.89        72
           1       0.53      0.38      0.44        21
           2       0.55      0.58      0.56        19

    accuracy                           0.76       112
   macro avg       0.65      0.63 