# Notebook used for analysis
Obs: on a Macbook Pro 13" M1 2020 with 16 GB memory and Sonoma 14.1, it took approximately 9.5-10 hours to run this code.

In [254]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, precision_recall_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import xgboost as xgb
import tensorflow as tf
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from pathlib import Path
from sklearn.dummy import DummyClassifier
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

In [255]:
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

In [256]:
# set default font size
plt.rcParams.update({'font.size': 16})
# set default legend font size
plt.rcParams.update({'legend.fontsize': 16})

In [257]:
plt.rcParams['font.family'] = 'serif'  # similar to Times New Roman
#plt.rcParams['font.family'] = 'sans-serif'  # similar to Libertine

In [258]:
all_roc_data = {}
all_prc_data = {}

In [259]:
# decorator for running a function on multiple dataset splits
def run_on_splits(func):
    def _run_loop(model, splits, **kwargs):
        results = {}
        roc_data = {}
        prc_data = {}
        test_roc_data = {}
        test_prc_data = {}
        model_name = kwargs.get('model_name', 'model')
        for split in splits:
            X, y, nsplit = split
            result, roc_info, prc_info = func(model, X, y, nsplit, **kwargs)
            results[nsplit] = result
            roc_data[nsplit] = roc_info
            prc_data[nsplit] = prc_info
            if nsplit == 'test':
                test_roc_data[model_name] = roc_info
                test_prc_data[model_name] = prc_info
        return results, roc_data, prc_data, test_roc_data, test_prc_data
    return _run_loop

@run_on_splits
def evaluate_classification(model, X, y, nsplit, model_name, best_params=None):
    preds = model.predict(X)
    pred_probs = model.predict_proba(X)[:, 1]
    accuracy = accuracy_score(y, preds)
    roc_auc = roc_auc_score(y, pred_probs)
    fpr, tpr, _ = roc_curve(y, pred_probs)
    precision, recall, _ = precision_recall_curve(y, pred_probs)
    prc_auc = auc(recall, precision)
    report = classification_report(y, preds, output_dict=True)
    print(f"{model_name} - {nsplit} - Accuracy: {accuracy}, ROC_AUC: {roc_auc}, PRC_AUC: {prc_auc}\n{report}")
    return (accuracy, report), (fpr, tpr, roc_auc), (precision, recall, prc_auc)

def save_model_results(results, model_name, results_dir):
    directory = results_dir
    os.makedirs(directory, exist_ok=True)
    filepath = os.path.join(directory, f'{model_name}_results.txt')
    with open(filepath, 'w') as f:
        for split, (accuracy, report) in results.items():
            f.write(f"{model_name} - {split} - Accuracy: {accuracy}\n")
            f.write("Classification Report:\n")
            for key, value in report.items():
                f.write(f"{key}: {value}\n")
            f.write("\n")           

def save_roc_auc_scores(roc_data, results_dir, filename='roc_auc_scores.txt'):
    with open(os.path.join(results_dir, filename), 'w') as f:
        for model_name, (fpr, tpr, roc_auc) in roc_data.items():
            f.write(f"{model_name}: ROC AUC = {roc_auc:.2f}\n")

def plot_feature_importances(model, model_name, feature_names, results_dir, filename='feature_importances.png'):
    feature_importances = model.feature_importances_
    indices = np.argsort(feature_importances)[-10:]
    plt.figure(figsize=(10, 6))
    plt.title('Feature Importances')
    plt.barh(range(len(indices)), feature_importances[indices], color='b', align='center')
    plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
    plt.xlabel('Relative Importance')
    full_path = os.path.join(results_dir, f'{model_name}_{filename}')
    plt.savefig(full_path)
    plt.close()
    
def plot_roc_curves(roc_data, model_name, results_dir, filename='roc_curves.png'):
    plt.figure(figsize=(10, 8))
    for split, (fpr, tpr, roc_auc) in roc_data.items():
        plt.plot(fpr, tpr, label=f'{model_name} - {split} (ROC AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves')
    plt.legend(loc="lower right")
    full_path = os.path.join(results_dir, f'{model_name}_{filename}')
    plt.savefig(full_path)
    plt.close()

def plot_prc_curves(prc_data, model_name, results_dir, filename='prc_curves.png'):
    plt.figure(figsize=(10, 8))
    for split, (precision, recall, prc_auc) in prc_data.items():
        plt.plot(recall, precision, label=f'{model_name} - {split} (PRC AUC = {prc_auc:.2f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curves')
    plt.legend(loc="lower left")
    full_path = os.path.join(results_dir, f'{model_name}_{filename}')
    plt.savefig(full_path)
    plt.close()

def plot_combined_roc_curves(all_roc_data, results_dir, filename='all_roc_curves.png'):
    plt.figure(figsize=(10, 8))
    for model_name, (fpr, tpr, roc_auc) in all_roc_data.items():
        plt.plot(fpr, tpr, label=f'{model_name} (ROC AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Combined ROC Curves')
    plt.legend(loc="lower right")
    full_path = os.path.join(results_dir, filename)
    plt.savefig(full_path)
    plt.close()

def plot_selected_roc_curves(selected_roc_data, results_dir, filename='selected_roc_curves.png'):
    plt.figure(figsize=(10, 8))
    for model_name, (fpr, tpr, roc_auc) in selected_roc_data.items():
        plt.plot(fpr, tpr, label=f'{model_name} (ROC AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Selected Models ROC Curves')
    plt.legend(loc="lower right")
    full_path = os.path.join(results_dir, filename)
    plt.savefig(full_path)
    plt.close()

def plot_combined_prc_curves(all_prc_data, results_dir, filename='all_prc_curves.png'):
    plt.figure(figsize=(10, 8))
    for model_name, prc_data in all_prc_data.items():
        precision, recall, prc_auc = prc_data
        plt.plot(recall, precision, label=f'{model_name} (PRC AUC = {prc_auc:.2f})')
    plt.xlabel('Recall',fontsize=12)
    plt.ylabel('Precision')
    plt.title('Combined Precision-Recall Curves')
    plt.legend(loc="lower left")
    full_path = os.path.join(results_dir, filename)
    plt.savefig(full_path)
    plt.close()
    
    
def save_best_params(model_name, best_params, results_dir):
    filepath = os.path.join(results_dir, 'best_params.txt')
    with open(filepath, 'a') as f:
        f.write(f"{model_name}:\n")
        for param, value in best_params.items():
            f.write(f"  {param}: {value}\n")
        f.write("\n")

def load_data(data_dir):
    train_data_path = data_dir / "train.csv"
    val_data_path = data_dir / "val.csv"
    test_data_path = data_dir / "test.csv"
    train_data = pd.read_csv(train_data_path)
    val_data = pd.read_csv(val_data_path)
    test_data = pd.read_csv(test_data_path)
    X_train = train_data.iloc[:, :-1].values
    y_train = train_data.iloc[:, -1].values
    X_val = val_data.iloc[:, :-1].values
    y_val = val_data.iloc[:, -1].values
    X_test = test_data.iloc[:, :-1].values
    y_test = test_data.iloc[:, -1].values
    feature_names = train_data.columns[:-1]
    return X_train, y_train, X_val, y_val, X_test, y_test, feature_names


In [260]:
def evaluate_deterministic_model(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    
    # calculate predictions for the deterministic model
    def deterministic_predict(X):
        sum_non_outcome = np.sum(X, axis=1)
        difference = 1 - sum_non_outcome
        return (difference >= 0.01).astype(int)

    splits = [
        (X_train, y_train, 'train'),
        (X_val, y_val, 'val'),
        (X_test, y_test, 'test')
    ]

    results = {}
    roc_data = {}
    prc_data = {}
    test_roc_data = {}
    test_prc_data = {}

    for X, y, nsplit in splits:
        preds = deterministic_predict(X)
        pred_probs = preds  # since it's deterministic, we use the binary predictions
        accuracy = accuracy_score(y, preds)
        roc_auc = roc_auc_score(y, pred_probs)
        fpr, tpr, _ = roc_curve(y, pred_probs)
        precision, recall, _ = precision_recall_curve(y, pred_probs)
        prc_auc = auc(recall, precision)
        report = classification_report(y, preds, output_dict=True)
        print(f"Deterministic - {nsplit} - Accuracy: {accuracy}, ROC_AUC: {roc_auc}, PRC_AUC: {prc_auc}\n{report}")
        results[nsplit] = (accuracy, report)
        roc_data[nsplit] = (fpr, tpr, roc_auc)
        prc_data[nsplit] = (precision, recall, prc_auc)
        if nsplit == 'test':
            test_roc_data = {"Deterministic": (fpr, tpr, roc_auc)}
            test_prc_data = {"Deterministic": (precision, recall, prc_auc)}

    save_model_results(results, "Deterministic", results_dir)
    plot_roc_curves(roc_data, "Deterministic", results_dir, filename='roc_curves.png')
    plot_prc_curves(prc_data, "Deterministic", results_dir, filename='prc_curves.png')

    all_roc_data["Deterministic"] = test_roc_data["Deterministic"]
    all_prc_data["Deterministic"] = test_prc_data["Deterministic"]

    return results, roc_data, prc_data


In [261]:
def tune_and_evaluate_rf(X_train, y_train, X_val, y_val, X_test, y_test, feature_names, results_dir):
    # basic random foorest model
    basic_rfc = RandomForestClassifier(random_state=42)
    basic_rfc.fit(X_train, y_train)
    
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]
    basic_results, basic_roc_data, basic_prc_data, test_roc_data, test_prc_data = evaluate_classification(basic_rfc, splits, model_name="Random_Forest_Basic")
    save_model_results(basic_results, "Random_Forest_Basic", results_dir)
    
    plot_roc_curves(basic_roc_data, "Random_Forest_Basic", results_dir, filename='roc_curves.png')
    plot_prc_curves(basic_prc_data, "Random_Forest_Basic", results_dir, filename='prc_curves.png')
    plot_feature_importances(basic_rfc, "Random_Forest_Basic", feature_names, results_dir, filename='feature_importances.png')

    all_roc_data["Random_Forest_Basic"] = test_roc_data["Random_Forest_Basic"]
    all_prc_data["Random_Forest_Basic"] = test_prc_data["Random_Forest_Basic"]

    # hyperparameter-tuned random forest model
    rfc = RandomForestClassifier(random_state=42)
    param_grid = {
        'n_estimators': [10, 50, 80, 100, 120, 200, 300, 400],
        'max_depth': [None, 3, 5, 10, 15, 20],
        'min_samples_split': [2, 5, 10, 15, 20],
        'max_features': ['sqrt', 'log2', None]
    }
    #cv_rfc = RandomizedSearchCV(estimator=rfc, param_distributions=param_grid, scoring='accuracy', n_iter=20, cv=3, random_state=42)
    cv_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, scoring='balanced_accuracy', cv=StratifiedKFold(n_splits=5), n_jobs=-1)
    cv_rfc.fit(X_train, y_train)
    best_params = cv_rfc.best_params_
    save_best_params('Random_Forest_Optimized', best_params, results_dir)
    print("Best parameters:", best_params)

    results, roc_data, prc_data, test_roc_data, test_prc_data = evaluate_classification(cv_rfc.best_estimator_, splits, model_name="Random_Forest_Optimized")
    save_model_results(results, "Random_Forest_Optimized", results_dir)

    plot_roc_curves(roc_data, "Random_Forest_Optimized", results_dir, filename='roc_curves.png')
    plot_prc_curves(prc_data, "Random_Forest_Optimized", results_dir, filename='prc_curves.png')
    plot_feature_importances(cv_rfc.best_estimator_, "Random_Forest_Optimized", feature_names, results_dir, filename='feature_importances.png')

    all_roc_data["Random_Forest_Optimized"] = test_roc_data["Random_Forest_Optimized"]
    all_prc_data["Random_Forest_Optimized"] = test_prc_data["Random_Forest_Optimized"]

    return results, roc_data, prc_data


In [262]:
def tune_clf_hyperparameters(clf, param_grid, X_train, y_train):
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    clf_grid = GridSearchCV(clf, param_grid, cv=cv, scoring='balanced_accuracy', n_jobs=-1)
    clf_grid.fit(X_train, y_train)
    print("Best hyperparameters:\n", clf_grid.best_params_)
    return clf_grid.best_estimator_

def tune_and_evaluate_xgboost(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    # basic XGBoost model
    basic_model = xgb.XGBClassifier(objective="binary:logistic", random_state=42)
    basic_model.fit(X_train, y_train)
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]
    basic_results, basic_roc_data, basic_prc_data, test_roc_data, test_prc_data = evaluate_classification(basic_model, splits, model_name="XGBoost_Basic")
    save_model_results(basic_results, "XGBoost_Basic", results_dir)
    
    plot_roc_curves(basic_roc_data, "XGBoost_Basic", results_dir, filename='roc_curves.png')
    plot_prc_curves(basic_prc_data, "XGBoost_Basic", results_dir, filename='prc_curves.png')
    
    all_roc_data["XGBoost_Basic"] = test_roc_data["XGBoost_Basic"]
    all_prc_data["XGBoost_Basic"] = test_prc_data["XGBoost_Basic"]

    # hyperparameter-tuned XGBoost model
    xgb_param_grid = {
        'max_depth': range(3, 10, 2),
        'min_child_weight': range(1, 6, 2),
        'learning_rate': [0.0001, 0.01, 0.1],
        'n_estimators': [50, 200]
    }
    
    xgb_clf = xgb.XGBClassifier(random_state=0)
    xgb_opt = tune_clf_hyperparameters(xgb_clf, xgb_param_grid, X_train, y_train)
    # identify the best hyperparameters
    best_params = xgb_opt.get_params()
    save_best_params('XGBoost_Optimized', best_params, results_dir)

    results, roc_data, prc_data, test_roc_data, test_prc_data = evaluate_classification(xgb_opt, splits, model_name="XGBoost_Optimized")
    save_model_results(results, "XGBoost_Optimized", results_dir)

    plot_roc_curves(roc_data, "XGBoost_Optimized", results_dir, filename='roc_curves.png')
    plot_prc_curves(prc_data, "XGBoost_Optimized", results_dir, filename='prc_curves.png')

    all_roc_data["XGBoost_Optimized"] = test_roc_data["XGBoost_Optimized"]
    all_prc_data["XGBoost_Optimized"] = test_prc_data["XGBoost_Optimized"]

    return results, roc_data, prc_data

In [263]:
from sklearn.calibration import CalibratedClassifierCV

def tune_and_evaluate_linear_svc(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    # define the LinearSVC model
    linear_svc = LinearSVC(random_state=42, dual=False)  # dual=False when n_samples > n_features

    # define the parameter grid for GridSearchCV
    param_grid = {
        'penalty': ['l1', 'l2'],
        'C': [0.001, 0.01, 0.1, 1, 10],
        'loss': ['hinge', 'squared_hinge'],
        'tol': [1e-4, 1e-3, 1e-2]
    }

    # set up the GridSearchCV
    grid_search = GridSearchCV(linear_svc, param_grid, cv=5, n_jobs=-1, verbose=1)

    # fit GridSearchCV
    grid_search.fit(X_train, y_train)

    # best estimator
    best_linear_svc = grid_search.best_estimator_

    # wrap the best LinearSVC model with CalibratedClassifierCV
    calibrated_svc = CalibratedClassifierCV(best_linear_svc, method='sigmoid', cv=5)
    calibrated_svc.fit(X_train, y_train)

    # save the best parameters
    save_best_params('Linear_SVC_Optimized', grid_search.best_params_, results_dir)

    # define splits
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]

    # evaluate the calibrated model using the existing evaluate_classification function
    results, roc_data, prc_data, test_roc_data, test_prc_data = evaluate_classification(calibrated_svc, splits, model_name="Linear_SVC_Optimized")

    # save and plot results
    save_model_results(results, "Linear_SVC_Optimized", results_dir)
    plot_roc_curves(roc_data, "Linear_SVC_Optimized", results_dir)
    plot_prc_curves(prc_data, "Linear_SVC_Optimized", results_dir)

    # store results for combined plotting
    all_roc_data["Linear_SVC_Optimized"] = test_roc_data["Linear_SVC_Optimized"]
    all_prc_data["Linear_SVC_Optimized"] = test_prc_data["Linear_SVC_Optimized"]

    return results, roc_data, prc_data


In [264]:
def tune_and_evaluate_neural_network(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    # define the neural network model
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    # compile the model, i.e., define the loss function and the optimizer
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # train the model
    model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))

    # evaluate the model on the test set
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print('Neural Network Test accuracy:', test_acc)

    # prepare results for consistency, this step is to compare with other models
    test_predictions = (model.predict(X_test) > 0.5).astype("int32")
    test_pred_probs = model.predict(X_test).flatten()
    test_report = classification_report(y_test, test_predictions, output_dict=True)

    # calculate ROC and PRC data
    fpr, tpr, _ = roc_curve(y_test, test_pred_probs)
    precision, recall, _ = precision_recall_curve(y_test, test_pred_probs)
    roc_auc = roc_auc_score(y_test, test_pred_probs)
    prc_auc = auc(recall, precision)

    results = {
        'train': ('Not Evaluated', {}),
        'val': ('Not Evaluated', {}),
        'test': (test_acc, test_report)
    }
    save_model_results(results, "Neural_Network", results_dir)

    # store ROC and PRC data for the test set
    test_roc_data = {"Neural_Network": (fpr, tpr, roc_auc)}
    test_prc_data = {"Neural_Network": (precision, recall, prc_auc)}

    all_roc_data["Neural_Network"] = test_roc_data["Neural_Network"]
    all_prc_data["Neural_Network"] = test_prc_data["Neural_Network"]

    # plot ROC and PRC curves
    plot_roc_curves(test_roc_data, "Neural_Network", results_dir, filename='roc_curves.png')
    plot_prc_curves(test_prc_data, "Neural_Network", results_dir, filename='prc_curves.png')

    return results, test_roc_data, test_prc_data


In [265]:
def evaluate_logistic_regression(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    
    # grid search for hyperparameter tuning
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'solver': ['newton-cg', 'lbfgs', 'liblinear']
    }
    grid_search = GridSearchCV(LogisticRegression(random_state=42, max_iter=10000), param_grid, cv=5, scoring='balanced_accuracy')
    grid_search.fit(X_train, y_train)

    # identify best hyperparameters
    print("Best hyperparameters:", grid_search.best_params_)
    # save the best parameters
    save_best_params('Logistic_Regression_Best', grid_search.best_params_, results_dir)        
    best_lr = grid_search.best_estimator_
    
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]
    best_results, best_roc_data, best_prc_data, test_roc_data, test_prc_data = evaluate_classification(best_lr, splits, model_name="Logistic_Regression_Best")
    save_model_results(best_results, "Logistic_Regression_Best", results_dir)
    
    plot_roc_curves(best_roc_data, "Logistic_Regression_Best", results_dir, filename='roc_curves_best.png')
    plot_prc_curves(best_prc_data, "Logistic_Regression_Best", results_dir, filename='prc_curves_best.png')

    all_roc_data["Logistic_Regression_Best"] = test_roc_data["Logistic_Regression_Best"]
    all_prc_data["Logistic_Regression_Best"] = test_prc_data["Logistic_Regression_Best"]

    return best_results, best_roc_data, best_prc_data


In [266]:
def evaluate_elastic_net_logistic_regression(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    
    # grid search for hyperparameter tuning with Elastic Net penalty
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'l1_ratio': [0.1, 0.5, 0.7, 0.9],
        'solver': ['saga'],
        'penalty': ['elasticnet']
    }
    grid_search = GridSearchCV(LogisticRegression(random_state=42, max_iter=10000), param_grid, cv=5, scoring='balanced_accuracy')
    grid_search.fit(X_train, y_train)
    
    best_enet_lr = grid_search.best_estimator_
    
    # identify best hyperparameters
    print("Best hyperparameters:", grid_search.best_params_)
    
    # save the best hyperparameters
    save_best_params('Elastic_Net_Logistic_Regression_Best', grid_search.best_params_, results_dir)
    
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]
    enet_results, enet_roc_data, enet_prc_data, test_roc_data, test_prc_data = evaluate_classification(best_enet_lr, splits, model_name="Elastic_Net_Logistic_Regression_Best")
    save_model_results(enet_results, "Elastic_Net_Logistic_Regression_Best", results_dir)
    
    plot_roc_curves(enet_roc_data, "Elastic_Net_Logistic_Regression_Best", results_dir, filename='roc_curves_best.png')
    plot_prc_curves(enet_prc_data, "Elastic_Net_Logistic_Regression_Best", results_dir, filename='prc_curves_best.png')

    all_roc_data["Elastic_Net_Logistic_Regression_Best"] = test_roc_data["Elastic_Net_Logistic_Regression_Best"]
    all_prc_data["Elastic_Net_Logistic_Regression_Best"] = test_prc_data["Elastic_Net_Logistic_Regression_Best"]

    return enet_results, enet_roc_data, enet_prc_data

In [267]:
def tune_and_evaluate_knn(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    # define the KNN model
    knn = KNeighborsClassifier()

    # define the parameter grid
    param_grid = {
        'n_neighbors': [3, 5, 7, 9, 11],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan', 'minkowski']
    }

    # perform Grid Search
    grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, scoring='balanced_accuracy', cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    # get the best estimator
    best_knn = grid_search.best_estimator_
    print("Best parameters:", grid_search.best_params_)
    
    # save the best hyperparameters
    save_best_params('KNN_Optimized', grid_search.best_params_, results_dir)
    
    # define splits
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]

    # evaluate the model
    results, roc_data, prc_data, test_roc_data, test_prc_data = evaluate_classification(best_knn, splits, model_name="KNN_Optimized")
    
    # save results
    save_model_results(results, "KNN_Optimized", results_dir)
    
    # plot ROC and PRC curves
    plot_roc_curves(roc_data, "KNN_Optimized", results_dir, filename='roc_curves.png')
    plot_prc_curves(prc_data, "KNN_Optimized", results_dir, filename='prc_curves.png')

    # store ROC and PRC data for the test set
    all_roc_data["KNN_Optimized"] = test_roc_data["KNN_Optimized"]
    all_prc_data["KNN_Optimized"] = test_prc_data["KNN_Optimized"]

    return results, roc_data, prc_data

In [268]:
def evaluate_majority_class_classifier(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    
    # train a dummy classifier that predicts the majority class
    majority_class_clf = DummyClassifier(strategy='most_frequent', random_state=42)
    majority_class_clf.fit(X_train, y_train)
    
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]
    majority_results, majority_roc_data, majority_prc_data, test_roc_data, test_prc_data = evaluate_classification(majority_class_clf, splits, model_name="Majority_Class_Classifier")
    save_model_results(majority_results, "Majority_Class_Classifier", results_dir)
    
    plot_roc_curves(majority_roc_data, "Majority_Class_Classifier", results_dir, filename='roc_curves.png')
    plot_prc_curves(majority_prc_data, "Majority_Class_Classifier", results_dir, filename='prc_curves.png')

    all_roc_data["Majority_Class_Classifier"] = test_roc_data["Majority_Class_Classifier"]
    all_prc_data["Majority_Class_Classifier"] = test_prc_data["Majority_Class_Classifier"]

    return majority_results, majority_roc_data, majority_prc_data

In [269]:
def evaluate_Chance_Class_Classifier(X_train, y_train, X_val, y_val, X_test, y_test, results_dir):
    
    # train a dummy classifier that predicts a random class
    random_class_clf = DummyClassifier(strategy='uniform', random_state=42)
    random_class_clf.fit(X_train, y_train)
    
    splits = [(X_train, y_train, 'train'), (X_val, y_val, 'val'), (X_test, y_test, 'test')]
    random_results, random_roc_data, random_prc_data, test_roc_data, test_prc_data = evaluate_classification(random_class_clf, splits, model_name="Chance_Class_Classifier")
    save_model_results(random_results, "Chance_Class_Classifier", results_dir)
    
    plot_roc_curves(random_roc_data, "Chance_Class_Classifier", results_dir, filename='roc_curves.png')
    plot_prc_curves(random_prc_data, "Chance_Class_Classifier", results_dir, filename='prc_curves.png')

    all_roc_data["Chance_Class_Classifier"] = test_roc_data["Chance_Class_Classifier"]
    all_prc_data["Chance_Class_Classifier"] = test_prc_data["Chance_Class_Classifier"]

    return random_results, random_roc_data, random_prc_data

In [270]:
def run_all_models(data_dir, results_dir):
    global all_roc_data, all_prc_data
    all_roc_data = {}
    all_prc_data = {}
    
    X_train, y_train, X_val, y_val, X_test, y_test, feature_names = load_data(data_dir)
    
    # create a separate directory for this dataset's results
    dataset_name = data_dir.stem
    dataset_results_dir = results_dir / dataset_name
    os.makedirs(dataset_results_dir, exist_ok=True)
    
    # SVC
    SVC_results, SVC_roc_data, SVC_prc_data = tune_and_evaluate_linear_svc(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)

    # Random Forest
    results_rf, roc_data_rf, prc_data_rf = tune_and_evaluate_rf(X_train, y_train, X_val, y_val, X_test, y_test, feature_names, dataset_results_dir)
    
    # XGBoost
    results_xgb, roc_data_xgb, prc_data_xgb = tune_and_evaluate_xgboost(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    # Logistic Regression
    basic_results, basic_roc_data, basic_prc_data = evaluate_logistic_regression(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    # Elastic Net Logistic Regression
    enet_results, enet_roc_data, enet_prc_data = evaluate_elastic_net_logistic_regression(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    # KNN Classifier
    knn_results, knn_roc_data, knn_prc_data = tune_and_evaluate_knn(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    # Majority Class Classifier
    majority_results, majority_roc_data, majority_prc_data = evaluate_majority_class_classifier(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    # Random Class Classifier
    random_results, random_roc_data, random_prc_data = evaluate_Chance_Class_Classifier(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    # Neural Network
    results_nn, roc_data_nn, prc_data_nn = tune_and_evaluate_neural_network(X_train, y_train, X_val, y_val, X_test, y_test, dataset_results_dir)
    
    
    # plot combined PRC and ROC curves for all models for the current dataset
    plot_combined_prc_curves(all_prc_data, dataset_results_dir, filename='all_prc_curves.png')
    plot_combined_roc_curves(all_roc_data, dataset_results_dir, filename='all_roc_curves.png')
    save_roc_auc_scores(all_roc_data, dataset_results_dir)
    
    selected_roc_data = {
        "RF": all_roc_data["Random_Forest_Optimized"],
        "XG": all_roc_data["XGBoost_Optimized"],
        "SVC": all_roc_data["Linear_SVC_Optimized"],
        "LogReg": all_roc_data["Elastic_Net_Logistic_Regression_Best"],
        "KNN": all_roc_data["KNN_Optimized"],
        "NN": all_roc_data["Neural_Network"]
    }
    
    plot_selected_roc_curves(selected_roc_data, dataset_results_dir, filename='selected_roc_curves.png')



#root / "data" / "backup"]
#root / "data" / "CLR",
#root / "data" / "CLR_nonreduced",
#root / "data" / "CLR_PCA", 
#root / "data" / "CLR_SVD", 
#root / "data" / "reduced_0_1", 
#root / "data" / "reduced_0_1_PCA", 
#root / "data" / "reduced_0_1_SVD", 
#root / "data" / "baseline_demographic",
#root / "data" / "non_reduced"]

In [271]:
def main():
    root = Path.cwd().parents[1]
    data_dir = root / "data"
    results_dir = root / "results" / "model_reports"
    
    for current_dir in data_dir.iterdir():
        if current_dir.is_dir():
            # skip the directory if it's named 'raw'
            if current_dir.name == 'raw':
                print(f"Skipping {current_dir}")
                continue
            print(f"Now processing {current_dir}")
            run_all_models(current_dir, results_dir)

main()


Now processing /Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/data/baseline_smote
Fitting 5 folds for each of 60 candidates, totalling 300 fits


150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.6651296111665005, ROC_AUC: 0.7449925274028364, PRC_AUC: 0.7402756213197952
{'0': {'precision': 0.6784271478588743, 'recall': 0.6278664007976071, 'f1-score': 0.6521682847896441, 'support': 4012.0}, '1': {'precision': 0.6536766411505451, 'recall': 0.7023928215353938, 'f1-score': 0.6771596780007209, 'support': 4012.0}, 'accuracy': 0.6651296111665005, 'macro avg': {'precision': 0.6660518945047097, 'recall': 0.6651296111665005, 'f1-score': 0.6646639813951825, 'support': 8024.0}, 'weighted avg': {'precision': 0.6660518945047097, 'recall': 0.6651296111665005, 'f1-score': 0.6646639813951825, 'support': 8024.0}}
Linear_SVC_Optimized - val - Accuracy: 0.6564019448946515, ROC_AUC: 0.7290215769182937, PRC_AUC: 0.545418905690528
{'0': {'precision': 0.8273273273273273, 'recall': 0.6406976744186047, 'f1-score': 0.7221494102228048, 'support': 860.0}, '1': {'precision': 0.45598591549295775, 'recall': 0.6925133689839572, 'f1-score': 0.5498938428874734, 'support

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4012.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8024.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8024.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 1234.0}, 'weighted avg': {'precision': 0.4856982996619288, 'recall': 0.6969205834683955, 'f1-score': 0.5724

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6069 - loss: 0.6621 - val_accuracy: 0.6499 - val_loss: 0.6032
Epoch 2/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6753 - loss: 0.5953 - val_accuracy: 0.6532 - val_loss: 0.5893
Epoch 3/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6843 - loss: 0.5781 - val_accuracy: 0.6653 - val_loss: 0.5761
Epoch 4/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6862 - loss: 0.5709 - val_accuracy: 0.6783 - val_loss: 0.5644
Epoch 5/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6879 - loss: 0.5660 - val_accuracy: 0.6572 - val_loss: 0.5779
Epoch 6/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6991 - loss: 0.5549 - val_accuracy: 0.6378 - val_loss: 0.5875
Epoch 7/10
[1m126/126[0m [32m━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.5887421022400919, ROC_AUC: 0.6217573862262341, PRC_AUC: 0.5995068922392656
{'0': {'precision': 0.6393146979260595, 'recall': 0.40723721998851237, 'f1-score': 0.4975438596491228, 'support': 1741.0}, '1': {'precision': 0.5651074589127687, 'recall': 0.7702469844916715, 'f1-score': 0.6519202722411278, 'support': 1741.0}, 'accuracy': 0.5887421022400919, 'macro avg': {'precision': 0.6022110784194141, 'recall': 0.5887421022400919, 'f1-score': 0.5747320659451253, 'support': 3482.0}, 'weighted avg': {'precision': 0.602211078419414, 'recall': 0.5887421022400919, 'f1-score': 0.5747320659451254, 'support': 3482.0}}
Linear_SVC_Optimized - val - Accuracy: 0.4740680713128039, ROC_AUC: 0.5590878000248725, PRC_AUC: 0.3604177448452326
{'0': {'precision': 0.747072599531616, 'recall': 0.37093023255813956, 'f1-score': 0.49572649572649574, 'support': 860.0}, '1': {'precision': 0.32961586121437425, 'recall': 0.7112299465240641, 'f1-score': 0.4504657070279424, 'suppo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 1741.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 1234.0}, 'weighted avg': {'precision': 0.4856982996619288, 'recall': 0.6969205834683955, 'f1-score': 0.5724

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5427 - loss: 0.6973 - val_accuracy: 0.5648 - val_loss: 0.7033
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5587 - loss: 0.6783 - val_accuracy: 0.5089 - val_loss: 0.7106
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 941us/step - accuracy: 0.5711 - loss: 0.6781 - val_accuracy: 0.5097 - val_loss: 0.6791
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5669 - loss: 0.6772 - val_accuracy: 0.4919 - val_loss: 0.6874
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5885 - loss: 0.6661 - val_accuracy: 0.4951 - val_loss: 0.7052
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 996us/step - accuracy: 0.5858 - loss: 0.6708 - val_accuracy: 0.4846 - val_loss: 0.6934
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.7020684860073005, ROC_AUC: 0.6469826591449088, PRC_AUC: 0.42636493866330255
{'0': {'precision': 0.7056928034371643, 'recall': 0.9825523429710867, 'f1-score': 0.8214211294019588, 'support': 4012.0}, '1': {'precision': 0.5808383233532934, 'recall': 0.05571510626076967, 'f1-score': 0.10167714884696016, 'support': 1741.0}, 'accuracy': 0.7020684860073005, 'macro avg': {'precision': 0.6432655633952289, 'recall': 0.5191337246159282, 'f1-score': 0.4615491391244595, 'support': 5753.0}, 'weighted avg': {'precision': 0.6679087516683447, 'recall': 0.7020684860073005, 'f1-score': 0.6036088105863404, 'support': 5753.0}}
Linear_SVC_Optimized - val - Accuracy: 0.7058346839546191, ROC_AUC: 0.5984392488496456, PRC_AUC: 0.3926765622761981
{'0': {'precision': 0.707256046705588, 'recall': 0.986046511627907, 'f1-score': 0.8237008256435163, 'support': 860.0}, '1': {'precision': 0.6571428571428571, 'recall': 0.06149732620320856, 'f1-score': 0.11246943765281174, 'supp

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.6973752824613245, ROC_AUC: 0.5, PRC_AUC: 0.6513123587693377
{'0': {'precision': 0.6973752824613245, 'recall': 1.0, 'f1-score': 0.8217101894521249, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.6973752824613245, 'macro avg': {'precision': 0.34868764123066226, 'recall': 0.5, 'f1-score': 0.41085509472606246, 'support': 5753.0}, 'weighted avg': {'precision': 0.4863322845880122, 'recall': 0.6973752824613245, 'f1-score': 0.5730403754705241, 'support': 5753.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 123

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5486 - loss: 1.3412 - val_accuracy: 0.6507 - val_loss: 0.6594
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 738us/step - accuracy: 0.6768 - loss: 0.6192 - val_accuracy: 0.6791 - val_loss: 0.6447
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 675us/step - accuracy: 0.6971 - loss: 0.5788 - val_accuracy: 0.6677 - val_loss: 0.6251
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 684us/step - accuracy: 0.7032 - loss: 0.5718 - val_accuracy: 0.6823 - val_loss: 0.6246
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665us/step - accuracy: 0.7002 - loss: 0.5496 - val_accuracy: 0.6840 - val_loss: 0.6107
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 691us/step - accuracy: 0.7063 - loss: 0.5523 - val_accuracy: 0.6807 - val_loss: 0.6139
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.6099942561746123, ROC_AUC: 0.6507444043890612, PRC_AUC: 0.6240213950259851
{'0': {'precision': 0.6236281471917366, 'recall': 0.5548535324526135, 'f1-score': 0.5872340425531914, 'support': 1741.0}, '1': {'precision': 0.5990688049663735, 'recall': 0.6651349798966112, 'f1-score': 0.6303756124115406, 'support': 1741.0}, 'accuracy': 0.6099942561746123, 'macro avg': {'precision': 0.6113484760790551, 'recall': 0.6099942561746123, 'f1-score': 0.6088048274823661, 'support': 3482.0}, 'weighted avg': {'precision': 0.6113484760790551, 'recall': 0.6099942561746123, 'f1-score': 0.608804827482366, 'support': 3482.0}}
Linear_SVC_Optimized - val - Accuracy: 0.5405186385737439, ROC_AUC: 0.5962162666335032, PRC_AUC: 0.37731686689075306
{'0': {'precision': 0.7470489038785835, 'recall': 0.5151162790697674, 'f1-score': 0.6097728836889195, 'support': 860.0}, '1': {'precision': 0.3494539781591264, 'recall': 0.5989304812834224, 'f1-score': 0.4413793103448276, 'support

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 1741.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 1234.0}, 'weighted avg': {'precision': 0.4856982996619288, 'recall': 0.6969205834683955, 'f1-score': 0.5724

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5223 - loss: 0.8559 - val_accuracy: 0.5057 - val_loss: 0.7602
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 759us/step - accuracy: 0.5977 - loss: 0.6780 - val_accuracy: 0.5284 - val_loss: 0.7165
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 697us/step - accuracy: 0.6383 - loss: 0.6340 - val_accuracy: 0.5729 - val_loss: 0.6919
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707us/step - accuracy: 0.6586 - loss: 0.6175 - val_accuracy: 0.5705 - val_loss: 0.6854
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6674 - loss: 0.6085 - val_accuracy: 0.5916 - val_loss: 0.6767
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6723 - loss: 0.5854 - val_accuracy: 0.5794 - val_loss: 0.6857
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.6740379092475589, ROC_AUC: 0.7476979335095301, PRC_AUC: 0.7378718635598087
{'0': {'precision': 0.687037037037037, 'recall': 0.6392877656519241, 'f1-score': 0.6623028860458197, 'support': 1741.0}, '1': {'precision': 0.6627282491944146, 'recall': 0.7087880528431936, 'f1-score': 0.684984734943103, 'support': 1741.0}, 'accuracy': 0.6740379092475589, 'macro avg': {'precision': 0.6748826431157258, 'recall': 0.6740379092475588, 'f1-score': 0.6736438104944613, 'support': 3482.0}, 'weighted avg': {'precision': 0.6748826431157259, 'recall': 0.6740379092475589, 'f1-score': 0.6736438104944613, 'support': 3482.0}}
Linear_SVC_Optimized - val - Accuracy: 0.6547811993517018, ROC_AUC: 0.7321057704265639, PRC_AUC: 0.548881983168559
{'0': {'precision': 0.8219584569732937, 'recall': 0.6441860465116279, 'f1-score': 0.7222946544980443, 'support': 860.0}, '1': {'precision': 0.45357142857142857, 'recall': 0.679144385026738, 'f1-score': 0.5438972162740899, 'support': 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 1741.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 1234.0}, 'weighted avg': {'precision': 0.4856982996619288, 'recall': 0.6969205834683955, 'f1-score': 0.5724

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5924 - loss: 0.6780 - val_accuracy: 0.6232 - val_loss: 0.6492
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 762us/step - accuracy: 0.6446 - loss: 0.6346 - val_accuracy: 0.6702 - val_loss: 0.6199
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 709us/step - accuracy: 0.6752 - loss: 0.6102 - val_accuracy: 0.6742 - val_loss: 0.5980
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 717us/step - accuracy: 0.6891 - loss: 0.5879 - val_accuracy: 0.6580 - val_loss: 0.5989
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 698us/step - accuracy: 0.6860 - loss: 0.5796 - val_accuracy: 0.6702 - val_loss: 0.5783
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 721us/step - accuracy: 0.6824 - loss: 0.5749 - val_accuracy: 0.6629 - val_loss: 0.5770
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.7028465346534654, ROC_AUC: 0.7838718017841388, PRC_AUC: 0.7986288730124798
{'0': {'precision': 0.6846968672526482, 'recall': 0.751980198019802, 'f1-score': 0.7167630057803468, 'support': 4040.0}, '1': {'precision': 0.7249519626681307, 'recall': 0.6537128712871287, 'f1-score': 0.6874918651568398, 'support': 4040.0}, 'accuracy': 0.7028465346534654, 'macro avg': {'precision': 0.7048244149603895, 'recall': 0.7028465346534654, 'f1-score': 0.7021274354685934, 'support': 8080.0}, 'weighted avg': {'precision': 0.7048244149603894, 'recall': 0.7028465346534654, 'f1-score': 0.7021274354685932, 'support': 8080.0}}
Linear_SVC_Optimized - val - Accuracy: 0.646774193548387, ROC_AUC: 0.6453668597399069, PRC_AUC: 0.40100239811229016
{'0': {'precision': 0.7553699284009546, 'recall': 0.73094688221709, 'f1-score': 0.7429577464788732, 'support': 866.0}, '1': {'precision': 0.42039800995024873, 'recall': 0.45187165775401067, 'f1-score': 0.43556701030927836, 'support



Best hyperparameters:
 {'learning_rate': 0.1, 'max_depth': 9, 'min_child_weight': 5, 'n_estimators': 200}
XGBoost_Optimized - train - Accuracy: 0.9998762376237624, ROC_AUC: 1.0, PRC_AUC: 1.0
{'0': {'precision': 0.9997525365008662, 'recall': 1.0, 'f1-score': 0.9998762529389927, 'support': 4040.0}, '1': {'precision': 1.0, 'recall': 0.9997524752475248, 'f1-score': 0.9998762223047407, 'support': 4040.0}, 'accuracy': 0.9998762376237624, 'macro avg': {'precision': 0.9998762682504331, 'recall': 0.9998762376237624, 'f1-score': 0.9998762376218667, 'support': 8080.0}, 'weighted avg': {'precision': 0.9998762682504331, 'recall': 0.9998762376237624, 'f1-score': 0.9998762376218667, 'support': 8080.0}}
XGBoost_Optimized - val - Accuracy: 0.6814516129032258, ROC_AUC: 0.7139500561929579, PRC_AUC: 0.4970012655781586
{'0': {'precision': 0.7728852838933952, 'recall': 0.7702078521939953, 'f1-score': 0.7715442452284558, 'support': 866.0}, '1': {'precision': 0.47214854111405835, 'recall': 0.47593582887700536

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 4040.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4040.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8080.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8080.0}}
Majority_Class_Classifier - val - Accuracy: 0.6983870967741935, ROC_AUC: 0.5, PRC_AUC: 0.6508064516129033
{'0': {'precision': 0.6983870967741935, 'recall': 1.0, 'f1-score': 0.8224121557454891, 'support': 866.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6983870967741935, 'macro avg': {'precision': 0.34919354838709676, 'recall': 0.5, 'f1-score': 0.41120607787274455, 'support': 1240.0}, 'weighted avg': {'precision': 0.48774453694068676, 'recall': 0.6983870967741935, 'f1-score': 0.574

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5854 - loss: 1.2449 - val_accuracy: 0.6371 - val_loss: 0.7267
Epoch 2/10
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 581us/step - accuracy: 0.7166 - loss: 0.5621 - val_accuracy: 0.6121 - val_loss: 0.7326
Epoch 3/10
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646us/step - accuracy: 0.7693 - loss: 0.4817 - val_accuracy: 0.6460 - val_loss: 0.6936
Epoch 4/10
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704us/step - accuracy: 0.8033 - loss: 0.4225 - val_accuracy: 0.6532 - val_loss: 0.6944
Epoch 5/10
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 527us/step - accuracy: 0.8278 - loss: 0.3923 - val_accuracy: 0.6621 - val_loss: 0.6908
Epoch 6/10
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 841us/step - accuracy: 0.8508 - loss: 0.3466 - val_accuracy: 0.6460 - val_loss: 0.7126
Epoch 7/10
[1m127/127[0m [

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.6951155918651138, ROC_AUC: 0.5982155486441307, PRC_AUC: 0.3704669242666624
{'0': {'precision': 0.6972396925227113, 'recall': 0.994765702891326, 'f1-score': 0.8198438783894824, 'support': 4012.0}, '1': {'precision': 0.27586206896551724, 'recall': 0.004595060310166571, 'f1-score': 0.00903954802259887, 'support': 1741.0}, 'accuracy': 0.6951155918651138, 'macro avg': {'precision': 0.4865508807441143, 'recall': 0.4996803816007463, 'f1-score': 0.4144417132060406, 'support': 5753.0}, 'weighted avg': {'precision': 0.5697204082165972, 'recall': 0.6951155918651138, 'f1-score': 0.5744744469330694, 'support': 5753.0}}
Linear_SVC_Optimized - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5618470961323219, PRC_AUC: 0.3529201337993256
{'0': {'precision': 0.6972402597402597, 'recall': 0.9988372093023256, 'f1-score': 0.8212237093690249, 'support': 860.0}, '1': {'precision': 0.5, 'recall': 0.00267379679144385, 'f1-score': 0.005319148936170213, 'support': 374.0}

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.6973752824613245, ROC_AUC: 0.5, PRC_AUC: 0.6513123587693377
{'0': {'precision': 0.6973752824613245, 'recall': 1.0, 'f1-score': 0.8217101894521249, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.6973752824613245, 'macro avg': {'precision': 0.34868764123066226, 'recall': 0.5, 'f1-score': 0.41085509472606246, 'support': 5753.0}, 'weighted avg': {'precision': 0.4863322845880122, 'recall': 0.6973752824613245, 'f1-score': 0.5730403754705241, 'support': 5753.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 123

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6934 - loss: 0.6561 - val_accuracy: 0.6912 - val_loss: 0.6185
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 593us/step - accuracy: 0.7029 - loss: 0.5958 - val_accuracy: 0.6961 - val_loss: 0.6109
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 746us/step - accuracy: 0.6905 - loss: 0.6000 - val_accuracy: 0.6985 - val_loss: 0.6098
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 654us/step - accuracy: 0.6945 - loss: 0.5983 - val_accuracy: 0.6969 - val_loss: 0.6105
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 537us/step - accuracy: 0.6992 - loss: 0.5957 - val_accuracy: 0.6961 - val_loss: 0.6120
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 545us/step - accuracy: 0.6955 - loss: 0.5955 - val_accuracy: 0.6953 - val_loss: 0.6120
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.7349209108291326, ROC_AUC: 0.7482910974142478, PRC_AUC: 0.571491547737356
{'0': {'precision': 0.7560222359481161, 'recall': 0.9152542372881356, 'f1-score': 0.8280527680685534, 'support': 4012.0}, '1': {'precision': 0.6205357142857143, 'recall': 0.3193566915565767, 'f1-score': 0.4216913158892681, 'support': 1741.0}, 'accuracy': 0.7349209108291326, 'macro avg': {'precision': 0.6882789751169152, 'recall': 0.6173054644223561, 'f1-score': 0.6248720419789108, 'support': 5753.0}, 'weighted avg': {'precision': 0.7150206655997341, 'recall': 0.7349209108291326, 'f1-score': 0.7050777483841912, 'support': 5753.0}}
Linear_SVC_Optimized - val - Accuracy: 0.7285251215559158, ROC_AUC: 0.7292640840691456, PRC_AUC: 0.5417634161480224
{'0': {'precision': 0.7497621313035204, 'recall': 0.9162790697674419, 'f1-score': 0.8246991104133962, 'support': 860.0}, '1': {'precision': 0.6065573770491803, 'recall': 0.2967914438502674, 'f1-score': 0.3985637342908438, 'support'

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.6973752824613245, ROC_AUC: 0.5, PRC_AUC: 0.6513123587693377
{'0': {'precision': 0.6973752824613245, 'recall': 1.0, 'f1-score': 0.8217101894521249, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.6973752824613245, 'macro avg': {'precision': 0.34868764123066226, 'recall': 0.5, 'f1-score': 0.41085509472606246, 'support': 5753.0}, 'weighted avg': {'precision': 0.4863322845880122, 'recall': 0.6973752824613245, 'f1-score': 0.5730403754705241, 'support': 5753.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 123

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6548 - loss: 0.6298 - val_accuracy: 0.7204 - val_loss: 0.5647
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552us/step - accuracy: 0.7284 - loss: 0.5441 - val_accuracy: 0.7253 - val_loss: 0.5385
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 546us/step - accuracy: 0.7387 - loss: 0.5224 - val_accuracy: 0.7293 - val_loss: 0.5277
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 543us/step - accuracy: 0.7432 - loss: 0.5023 - val_accuracy: 0.7310 - val_loss: 0.5260
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 541us/step - accuracy: 0.7359 - loss: 0.5237 - val_accuracy: 0.7318 - val_loss: 0.5234
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7325 - loss: 0.5170 - val_accuracy: 0.7301 - val_loss: 0.5217
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.6145314057826521, ROC_AUC: 0.6605635486362447, PRC_AUC: 0.6316763317910177
{'0': {'precision': 0.626688723462917, 'recall': 0.5665503489531406, 'f1-score': 0.5951040712135096, 'support': 4012.0}, '1': {'precision': 0.6045030702751877, 'recall': 0.6625124626121635, 'f1-score': 0.6321798073492686, 'support': 4012.0}, 'accuracy': 0.6145314057826521, 'macro avg': {'precision': 0.6155958968690524, 'recall': 0.6145314057826521, 'f1-score': 0.6136419392813891, 'support': 8024.0}, 'weighted avg': {'precision': 0.6155958968690524, 'recall': 0.6145314057826521, 'f1-score': 0.6136419392813891, 'support': 8024.0}}
Linear_SVC_Optimized - val - Accuracy: 0.5623987034035657, ROC_AUC: 0.5996735480661609, PRC_AUC: 0.3810507371323808
{'0': {'precision': 0.7572347266881029, 'recall': 0.5476744186046512, 'f1-score': 0.6356275303643725, 'support': 860.0}, '1': {'precision': 0.36437908496732024, 'recall': 0.5962566844919787, 'f1-score': 0.45233265720081134, 'suppor

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4012.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8024.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8024.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 1234.0}, 'weighted avg': {'precision': 0.4856982996619288, 'recall': 0.6969205834683955, 'f1-score': 0.5724

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 953us/step - accuracy: 0.5557 - loss: 0.7878 - val_accuracy: 0.5600 - val_loss: 0.6908
Epoch 2/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 486us/step - accuracy: 0.6336 - loss: 0.6323 - val_accuracy: 0.5640 - val_loss: 0.6905
Epoch 3/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483us/step - accuracy: 0.6536 - loss: 0.6119 - val_accuracy: 0.5429 - val_loss: 0.7152
Epoch 4/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 484us/step - accuracy: 0.6669 - loss: 0.5920 - val_accuracy: 0.5859 - val_loss: 0.7003
Epoch 5/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 478us/step - accuracy: 0.6908 - loss: 0.5768 - val_accuracy: 0.5608 - val_loss: 0.7163
Epoch 6/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6935 - loss: 0.5732 - val_accuracy: 0.5729 - val_loss: 0.6906
Epoch 7/10
[1m126/126[0m [

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.5897308075772681, ROC_AUC: 0.6270484098551802, PRC_AUC: 0.6034623821167999
{'0': {'precision': 0.6474201474201474, 'recall': 0.3940677966101695, 'f1-score': 0.48992872637124263, 'support': 4012.0}, '1': {'precision': 0.5644930132568972, 'recall': 0.7853938185443669, 'f1-score': 0.6568688763810715, 'support': 4012.0}, 'accuracy': 0.5897308075772681, 'macro avg': {'precision': 0.6059565803385223, 'recall': 0.5897308075772681, 'f1-score': 0.5733988013761571, 'support': 8024.0}, 'weighted avg': {'precision': 0.6059565803385224, 'recall': 0.5897308075772681, 'f1-score': 0.5733988013761571, 'support': 8024.0}}
Linear_SVC_Optimized - val - Accuracy: 0.47244732576985415, ROC_AUC: 0.5621160303444844, PRC_AUC: 0.3638525941997946
{'0': {'precision': 0.7482185273159145, 'recall': 0.36627906976744184, 'f1-score': 0.4918032786885246, 'support': 860.0}, '1': {'precision': 0.3296432964329643, 'recall': 0.7165775401069518, 'f1-score': 0.45155855096882896, 'sup

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4012.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8024.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 8024.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 1234.0}, 'weighted avg': {'precision': 0.4856982996619288, 'recall': 0.6969205834683955, 'f1-score': 0.5724

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 993us/step - accuracy: 0.5399 - loss: 0.6896 - val_accuracy: 0.5016 - val_loss: 0.6808
Epoch 2/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489us/step - accuracy: 0.5895 - loss: 0.6702 - val_accuracy: 0.4789 - val_loss: 0.6997
Epoch 3/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488us/step - accuracy: 0.5937 - loss: 0.6692 - val_accuracy: 0.5251 - val_loss: 0.6773
Epoch 4/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5974 - loss: 0.6621 - val_accuracy: 0.5049 - val_loss: 0.6784
Epoch 5/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 944us/step - accuracy: 0.5977 - loss: 0.6603 - val_accuracy: 0.5300 - val_loss: 0.6724
Epoch 6/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 672us/step - accuracy: 0.5893 - loss: 0.6603 - val_accuracy: 0.5089 - val_loss: 0.6820
Epoch 7/10
[1m126/126[0m [

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.7062402224926125, ROC_AUC: 0.6926821488435325, PRC_AUC: 0.46544628375318253
{'0': {'precision': 0.7147613762486127, 'recall': 0.963110667996012, 'f1-score': 0.8205563813973242, 'support': 4012.0}, '1': {'precision': 0.5734870317002881, 'recall': 0.11430212521539346, 'f1-score': 0.19061302681992337, 'support': 1741.0}, 'accuracy': 0.7062402224926125, 'macro avg': {'precision': 0.6441242039744504, 'recall': 0.5387063966057027, 'f1-score': 0.5055847041086238, 'support': 5753.0}, 'weighted avg': {'precision': 0.6720082676342145, 'recall': 0.7062402224926125, 'f1-score': 0.6299199516529727, 'support': 5753.0}}
Linear_SVC_Optimized - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.6261503544335281, PRC_AUC: 0.4088521399400092
{'0': {'precision': 0.7124125874125874, 'recall': 0.9476744186046512, 'f1-score': 0.8133732534930139, 'support': 860.0}, '1': {'precision': 0.5, 'recall': 0.12032085561497326, 'f1-score': 0.1939655172413793, 'support': 374.0}, '

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.6973752824613245, ROC_AUC: 0.5, PRC_AUC: 0.6513123587693377
{'0': {'precision': 0.6973752824613245, 'recall': 1.0, 'f1-score': 0.8217101894521249, 'support': 4012.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.6973752824613245, 'macro avg': {'precision': 0.34868764123066226, 'recall': 0.5, 'f1-score': 0.41085509472606246, 'support': 5753.0}, 'weighted avg': {'precision': 0.4863322845880122, 'recall': 0.6973752824613245, 'f1-score': 0.5730403754705241, 'support': 5753.0}}
Majority_Class_Classifier - val - Accuracy: 0.6969205834683955, ROC_AUC: 0.5, PRC_AUC: 0.6515397082658023
{'0': {'precision': 0.6969205834683955, 'recall': 1.0, 'f1-score': 0.8213944603629417, 'support': 860.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6969205834683955, 'macro avg': {'precision': 0.34846029173419774, 'recall': 0.5, 'f1-score': 0.41069723018147086, 'support': 123

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6129 - loss: 1.2181 - val_accuracy: 0.6564 - val_loss: 0.7192
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 554us/step - accuracy: 0.6844 - loss: 0.6118 - val_accuracy: 0.6621 - val_loss: 0.6508
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 573us/step - accuracy: 0.7253 - loss: 0.5363 - val_accuracy: 0.6896 - val_loss: 0.6332
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 566us/step - accuracy: 0.7497 - loss: 0.4880 - val_accuracy: 0.6831 - val_loss: 0.6227
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 563us/step - accuracy: 0.7767 - loss: 0.4582 - val_accuracy: 0.6953 - val_loss: 0.6192
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 677us/step - accuracy: 0.7962 - loss: 0.4203 - val_accuracy: 0.7002 - val_loss: 0.6324
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━

150 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/emmaolsen/Desktop/datsci_backup_23/MB-LM-24/venv/lib/python3.10/site-packages/sklearn/svm/_classes.py", line 325, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_libli

Linear_SVC_Optimized - train - Accuracy: 0.6352670878805284, ROC_AUC: 0.6892748824594261, PRC_AUC: 0.6696328303643824
{'0': {'precision': 0.6395969176052163, 'recall': 0.6197587593337163, 'f1-score': 0.6295215869311552, 'support': 1741.0}, '1': {'precision': 0.6311977715877437, 'recall': 0.6507754164273406, 'f1-score': 0.6408371040723982, 'support': 1741.0}, 'accuracy': 0.6352670878805284, 'macro avg': {'precision': 0.63539734459648, 'recall': 0.6352670878805284, 'f1-score': 0.6351793455017767, 'support': 3482.0}, 'weighted avg': {'precision': 0.6353973445964799, 'recall': 0.6352670878805284, 'f1-score': 0.6351793455017767, 'support': 3482.0}}
Linear_SVC_Optimized - val - Accuracy: 0.6129032258064516, ROC_AUC: 0.6742228699163899, PRC_AUC: 0.42899656120442087
{'0': {'precision': 0.8112903225806452, 'recall': 0.5808314087759815, 'f1-score': 0.676985195154778, 'support': 866.0}, '1': {'precision': 0.41451612903225804, 'recall': 0.6871657754010695, 'f1-score': 0.5171026156941649, 'support'

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Majority_Class_Classifier - train - Accuracy: 0.5, ROC_AUC: 0.5, PRC_AUC: 0.75
{'0': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 1741.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1741.0}, 'accuracy': 0.5, 'macro avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}, 'weighted avg': {'precision': 0.25, 'recall': 0.5, 'f1-score': 0.3333333333333333, 'support': 3482.0}}
Majority_Class_Classifier - val - Accuracy: 0.6983870967741935, ROC_AUC: 0.5, PRC_AUC: 0.6508064516129033
{'0': {'precision': 0.6983870967741935, 'recall': 1.0, 'f1-score': 0.8224121557454891, 'support': 866.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 374.0}, 'accuracy': 0.6983870967741935, 'macro avg': {'precision': 0.34919354838709676, 'recall': 0.5, 'f1-score': 0.41120607787274455, 'support': 1240.0}, 'weighted avg': {'precision': 0.48774453694068676, 'recall': 0.6983870967741935, 'f1-score': 0.574

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5456 - loss: 1.1095 - val_accuracy: 0.5734 - val_loss: 0.7945
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6253 - loss: 0.7093 - val_accuracy: 0.5895 - val_loss: 0.7325
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7009 - loss: 0.5934 - val_accuracy: 0.6194 - val_loss: 0.6743
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 933us/step - accuracy: 0.7175 - loss: 0.5520 - val_accuracy: 0.6218 - val_loss: 0.6795
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 924us/step - accuracy: 0.7548 - loss: 0.5095 - val_accuracy: 0.6113 - val_loss: 0.6948
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1000us/step - accuracy: 0.7743 - loss: 0.4859 - val_accuracy: 0.6137 - val_loss: 0.7111
Epoch 7/10
[1m55/55[0m [32m━━━