In [2]:
from sklearn.utils import all_estimators
from sklearn.base import ClassifierMixin
from sklearn.linear_model import LogisticRegression
from utilities import *
from sklearn.ensemble import RandomForestClassifier
from inspect import signature
import csv
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix

In [3]:
X_train, X_test, y_train, y_test, pd_passthrough_train, pd_passthrough_test = prepare_data(data_subdir="ZMUMU_EGZ_extended_iso_vars", format_mode="iso_vars")

found preprepared data in ..\data\ZMUMU_EGZ_extended_iso_vars


In [4]:
all_estimators_list = all_estimators(type_filter="classifier")

binary_classifiers = {}
default_base_estimator = LogisticRegression()

for name, Classifier in all_estimators_list:
    try:
        # Handle meta-estimators requiring base estimators or parameters
        if name in [
            "ClassifierChain",
            "FixedThresholdClassifier",
            "MultiOutputClassifier",
            "OneVsOneClassifier",
            "OneVsRestClassifier",
            "OutputCodeClassifier",
            "StackingClassifier",
            "TunedThresholdClassifierCV",
            "VotingClassifier",
        ]:
            if "ClassifierChain" == name:
                clf = Classifier(base_estimator=default_base_estimator)
            elif name in [
                "MultiOutputClassifier",
                "OneVsOneClassifier",
                "OneVsRestClassifier",
                "OutputCodeClassifier",
                "FixedThresholdClassifier",
                "TunedThresholdClassifierCV",
            ]:
                clf = Classifier(estimator=default_base_estimator)
            elif name == "StackingClassifier":
                clf = Classifier(estimators=[("lr", LogisticRegression())])
            elif name == "VotingClassifier":
                clf = Classifier(estimators=[("lr", LogisticRegression())])
        else:
            # Instantiate directly for non-meta classifiers
            clf = Classifier()

        if hasattr(clf, "predict"):  # Check for predict method
            binary_classifiers[name] = Classifier
            globals()[name] = Classifier  # Dynamically add to global namespace
    except Exception as e:
        print(f"Could not import {name}: {e}")

# Verify imported classifiers
print(f"Imported {len(binary_classifiers)} binary classifiers:")
print(list(binary_classifiers.keys()))


Imported 42 binary classifiers:
['AdaBoostClassifier', 'BaggingClassifier', 'BernoulliNB', 'CalibratedClassifierCV', 'CategoricalNB', 'ClassifierChain', 'ComplementNB', 'DecisionTreeClassifier', 'DummyClassifier', 'ExtraTreeClassifier', 'ExtraTreesClassifier', 'FixedThresholdClassifier', 'GaussianNB', 'GaussianProcessClassifier', 'GradientBoostingClassifier', 'HistGradientBoostingClassifier', 'KNeighborsClassifier', 'LabelPropagation', 'LabelSpreading', 'LinearDiscriminantAnalysis', 'LinearSVC', 'LogisticRegression', 'LogisticRegressionCV', 'MLPClassifier', 'MultiOutputClassifier', 'MultinomialNB', 'NearestCentroid', 'NuSVC', 'OneVsOneClassifier', 'OneVsRestClassifier', 'OutputCodeClassifier', 'PassiveAggressiveClassifier', 'Perceptron', 'QuadraticDiscriminantAnalysis', 'RadiusNeighborsClassifier', 'RandomForestClassifier', 'RidgeClassifier', 'RidgeClassifierCV', 'SGDClassifier', 'SVC', 'TunedThresholdClassifierCV', 'VotingClassifier']


In [4]:

# binary_classifiers_short = dict(list(binary_classifiers.items())[:10])
results = []
for name, Classifier in binary_classifiers.items():
    try:
        if name in [
            "ClassifierChain",
            "FixedThresholdClassifier",
            "MultiOutputClassifier",
            "OneVsOneClassifier",
            "OneVsRestClassifier",
            "OutputCodeClassifier",
            "StackingClassifier",
            "TunedThresholdClassifierCV",
            "VotingClassifier",
        ]:
            if name == "ClassifierChain":
                clf = Classifier(base_estimator=LogisticRegression())
            elif name in [
                "MultiOutputClassifier",
                "OneVsOneClassifier",
                "OneVsRestClassifier",
                "OutputCodeClassifier",
                "FixedThresholdClassifier",
                "TunedThresholdClassifierCV",
            ]:
                clf = Classifier(estimator=LogisticRegression())
            elif name == "StackingClassifier":
                clf = Classifier(estimators=[("lr", LogisticRegression()), ("rf", RandomForestClassifier())])
            elif name == "VotingClassifier":
                clf = Classifier(estimators=[("lr", LogisticRegression()), ("rf", RandomForestClassifier())])
        else:
            # Check if random_state is a parameter and set it if available
            params = signature(Classifier).parameters
            if "random_state" in params:
                clf = Classifier(random_state=42)
            else:
                clf = Classifier()

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

        # Save results
        results.append({
            "Classifier": name,
            "Accuracy": accuracy,
            "MSE": mse,
            "TN": tn,
            "FP": fp,
            "FN": fn,
            "TP": tp,
        })

        evaluate_sklearn_model(y_test, y_pred, show_CR=False, show_MSE=True, model_name=f'{name}')
    except Exception as e:
        # On error, save classifier name with NULL values
        print(f"Could not evaluate {name}: {e}\n")
        results.append({
            "Classifier": name,
            "Accuracy": "NULL",
            "MSE": "NULL",
            "TN": "NULL",
            "FP": "NULL",
            "FN": "NULL",
            "TP": "NULL",
            })




Evaluation of AdaBoostClassifier
Accuracy: 0.9568
Confusion Matrix:
 [[27334  1276]
 [  718 16854]]
Mean Squared Error: 0.0432

Evaluation of BaggingClassifier
Accuracy: 0.9571
Confusion Matrix:
 [[27453  1157]
 [  823 16749]]
Mean Squared Error: 0.0429

Evaluation of BernoulliNB
Accuracy: 0.7140
Confusion Matrix:
 [[20795  7815]
 [ 5394 12178]]
Mean Squared Error: 0.2860

Evaluation of CalibratedClassifierCV
Accuracy: 0.9487
Confusion Matrix:
 [[27195  1415]
 [  953 16619]]
Mean Squared Error: 0.0513

Evaluation of CategoricalNB
Accuracy: 0.9453
Confusion Matrix:
 [[27018  1592]
 [  936 16636]]
Mean Squared Error: 0.0547

Could not evaluate ClassifierChain: tuple index out of range

Evaluation of ComplementNB
Accuracy: 0.7485
Confusion Matrix:
 [[17188 11422]
 [  195 17377]]
Mean Squared Error: 0.2515

Evaluation of DecisionTreeClassifier
Accuracy: 0.9421
Confusion Matrix:
 [[27249  1361]
 [ 1314 16258]]
Mean Squared Error: 0.0579

Evaluation of DummyClassifier
Accuracy: 0.6195
Confus

In [5]:
output_file = "sklearn_all_results_iso.csv"
with open(output_file, mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["Classifier", "Accuracy", "MSE", "TN", "FP", "FN", "TP"])
    writer.writeheader()
    writer.writerows(results)

print(f"Results saved to {output_file}")

Results saved to sklearn_all_results_iso.csv
