### Global Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, FixedThresholdClassifier

### Data Loading

In [2]:
cleanedData = pd.read_csv('../Fully-Cleaned-Data.csv')

In [3]:
categoricalColumns = cleanedData.select_dtypes(include=['object']).columns.tolist()
numericalData = cleanedData.drop(columns=categoricalColumns)
oneHotData = pd.get_dummies(cleanedData[categoricalColumns])

In [4]:
cleanedDataOneHotEncoded = pd.concat([numericalData, oneHotData], axis=1)

In [5]:
X = cleanedDataOneHotEncoded.drop(['Future Relapse Binary'], axis=1)
y = cleanedDataOneHotEncoded['Future Relapse Binary']
XTrain, XTest, yTrain, yTest = train_test_split(X, y, stratify=y, random_state=42)

### Helper Funcs

In [36]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score

def print_scoring_metrics(fittedModel, return_scores = False):
    train_preds = fittedModel.predict(XTrain)
    train_proba_preds = fittedModel.predict_proba(XTrain)[:, 1]
    test_preds = fittedModel.predict(XTest)
    test_proba_preds = fittedModel.predict_proba(XTest)[:, 1]
    print('Training Scores')
    print(f'Accuracy: {accuracy_score(yTrain, train_preds)}')
    print(f'Recall: {recall_score(yTrain, train_preds)}')
    print(f'Specificity: {recall_score(yTrain, train_preds, pos_label=0)}')
    print(f'Precision: {precision_score(yTrain, train_preds)}')
    print(f'F1: {f1_score(yTrain, train_preds)}')
    print(f'ROC AUC: {roc_auc_score(yTrain, train_proba_preds)}')
    print('--------------------------------')
    print('Test Scores')
    print(f'Accuracy: {accuracy_score(yTest, test_preds)}')
    print(f'Recall: {recall_score(yTest, test_preds)}')
    print(f'Specificity: {recall_score(yTest, test_preds, pos_label=0)}')
    print(f'Precision: {precision_score(yTest, test_preds)}')
    print(f'F1: {f1_score(yTest, test_preds)}')
    print(f'ROC AUC: {roc_auc_score(yTest, test_proba_preds)}')
    if return_scores:
        return f'{accuracy_score(yTest, test_preds):.4f}\t{recall_score(yTest, test_preds):.4f}\t{recall_score(yTest, test_preds, pos_label=0):.4f}\t{precision_score(yTest, test_preds):.4f}\t{f1_score(yTest, test_preds):.4f}\t{roc_auc_score(yTest, test_proba_preds):.4f}'
    else:
        print(f'{accuracy_score(yTest, test_preds):.4f}\t{recall_score(yTest, test_preds):.4f}\t{recall_score(yTest, test_preds, pos_label=0):.4f}\t{precision_score(yTest, test_preds):.4f}\t{f1_score(yTest, test_preds):.4f}\t{roc_auc_score(yTest, test_proba_preds):.4f}')

In [7]:
from sklearn.metrics import make_scorer

cross_validation_scoring = {
    'acc': 'accuracy', 
    'rec': 'recall', 
    'spec': make_scorer(recall_score, pos_label=0), 
    'prec': 'precision',
    'f1_score': 'f1', 
    'auc': 'roc_auc'
}

In [35]:
from sklearn.model_selection import cross_validate
from sklearn.frozen import FrozenEstimator

def cross_validated_threshold(fittedModel, new_threshold):
    tuned_threshold_model = FixedThresholdClassifier(
        estimator = FrozenEstimator(fittedModel),
        threshold = new_threshold
    )

    cv_results = cross_validate(tuned_threshold_model, X, y, cv=10, scoring=cross_validation_scoring)
    
    return tuned_threshold_model, cv_results

In [46]:
def find_recall_threshold(fittedModel):
    results_dict = {}
    #recall_dict = {}
    for threshold in np.linspace(0, 1, 99):
        # new_threshold_model = FixedThresholdClassifier(
        #     estimator = FrozenEstimator(fittedModel), 
        #     threshold = threshold
        # )
        tuned_threshold_model, cv_results = cross_validated_threshold(fittedModel, threshold)
        #print(np.average(cv_results['test_rec']))
        if np.abs(np.average(cv_results['test_rec']) - 0.7) < 0.02:
            print(np.average(cv_results['test_rec']))
            print(threshold)
            results_dict[threshold] = print_scoring_metrics(tuned_threshold_model, True)

In [10]:
def using_recall_threshold(fittedModel, new_threshold):
    tuned_threshold_model = FixedThresholdClassifier(
        estimator = FrozenEstimator(fittedModel),
        threshold = new_threshold
    ).fit(XTrain, yTrain)
    print_scoring_metrics(tuned_threshold_model)

In [11]:
from sklearn.metrics import make_scorer

cross_validation_scoring = {
    'acc': 'accuracy', 
    'rec': 'recall', 
    'spec': make_scorer(recall_score, pos_label=0), 
    'prec': 'precision',
    'f1_score': 'f1', 
    'auc': 'roc_auc'
}

In [12]:
from sklearn.model_selection import cross_validate

def cross_validated_threshold(fittedModel, new_threshold):
    tuned_threshold_model = FixedThresholdClassifier(
        estimator = FrozenEstimator(fittedModel),
        threshold = new_threshold
    )

    cv_results = cross_validate(tuned_threshold_model, XTrain, yTrain, cv=10, scoring=cross_validation_scoring)

    return cv_results

### Decision Tree

criterion: entropy, max_depth: 8, max_features: sqrt, min_samples_split: 9

In [26]:
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier(
    class_weight='balanced',
    max_depth=8,
    max_features='sqrt',
    min_samples_split=9,
    criterion='entropy'
).fit(XTrain, yTrain)

In [27]:
print_scoring_metrics(decision_tree)

Training Scores
Accuracy: 0.6794788273615635
Recall: 0.7431693989071039
Specificity: 0.6440162271805274
Precision: 0.5375494071146245
F1: 0.6238532110091743
ROC AUC: 0.762370823588527
--------------------------------
Test Scores
Accuracy: 0.5703125
Recall: 0.5792349726775956
Specificity: 0.5653495440729484
Precision: 0.42570281124497994
F1: 0.49074074074074076
ROC AUC: 0.5785041606457721
0.5703	0.5792	0.5653	0.4257	0.4907	0.5785


In [59]:
print(roc_auc_score(yTest, decision_tree.predict_proba(XTest)[:, 1]))

0.5785041606457721


In [60]:
find_recall_threshold(decision_tree)

0.7015549796371714
0.49999999999999994
Training Scores
Accuracy: 0.6794788273615635
Recall: 0.7431693989071039
Specificity: 0.6440162271805274
Precision: 0.5375494071146245
F1: 0.6238532110091743
ROC AUC: 0.762370823588527
--------------------------------
Test Scores
Accuracy: 0.5703125
Recall: 0.5792349726775956
Specificity: 0.5653495440729484
Precision: 0.42570281124497994
F1: 0.49074074074074076
ROC AUC: 0.5785041606457721
0.7015549796371714
0.5102040816326531
Training Scores
Accuracy: 0.6794788273615635
Recall: 0.7431693989071039
Specificity: 0.6440162271805274
Precision: 0.5375494071146245
F1: 0.6238532110091743
ROC AUC: 0.762370823588527
--------------------------------
Test Scores
Accuracy: 0.5703125
Recall: 0.5792349726775956
Specificity: 0.5653495440729484
Precision: 0.42570281124497994
F1: 0.49074074074074076
ROC AUC: 0.5785041606457721
0.7015549796371714
0.520408163265306
Training Scores
Accuracy: 0.6794788273615635
Recall: 0.7431693989071039
Specificity: 0.6440162271805274


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [70]:
using_recall_threshold(decision_tree, .42)

Training Scores
Accuracy: 0.641042345276873
Recall: 0.8397085610200364
Specificity: 0.5304259634888439
Precision: 0.4989177489177489
F1: 0.6259334691106585
ROC AUC: 0.762370823588527
--------------------------------
Test Scores
Accuracy: 0.552734375
Recall: 0.6994535519125683
Specificity: 0.47112462006079026
Precision: 0.423841059602649
F1: 0.5278350515463918
ROC AUC: 0.5785041606457721
0.5527	0.6995	0.4711	0.4238	0.5278	0.5785


In [22]:
cross_validated_threshold(decision_tree, 0.39795918367346933)

fit_time
0.0009387016296386718
score_time
0.007428741455078125
test_acc
0.5179271708683473
test_rec
0.9726599326599328
test_spec
0.2647186147186147
test_prec
0.4250003580723882
test_f1_score
0.5911953502356774
test_auc
0.7036059974443812


### Random Forest

class_weight: balanced, criterion: gini, max_depth: 4, n_estimators: 250

In [71]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(
    class_weight='balanced',
    criterion='gini',
    max_depth=4,
    n_estimators=250
).fit(XTrain, yTrain)

In [72]:
print_scoring_metrics(random_forest)

Training Scores
Accuracy: 0.6625407166123779
Recall: 0.7231329690346083
Specificity: 0.6288032454361054
Precision: 0.5203145478374837
F1: 0.6051829268292683
ROC AUC: 0.7415723591113477
--------------------------------
Test Scores
Accuracy: 0.6015625
Recall: 0.6174863387978142
Specificity: 0.5927051671732523
Precision: 0.4574898785425101
F1: 0.5255813953488372
ROC AUC: 0.6384307472553025
0.6016	0.6175	0.5927	0.4575	0.5256	0.6384


In [73]:
find_recall_threshold(random_forest)

0.6959644576082933
0.49999999999999994
Training Scores
Accuracy: 0.6625407166123779
Recall: 0.7231329690346083
Specificity: 0.6288032454361054
Precision: 0.5203145478374837
F1: 0.6051829268292683
ROC AUC: 0.7415723591113477
--------------------------------
Test Scores
Accuracy: 0.6015625
Recall: 0.6174863387978142
Specificity: 0.5927051671732523
Precision: 0.4574898785425101
F1: 0.5255813953488372
ROC AUC: 0.6384307472553025


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [80]:
using_recall_threshold(random_forest, 0.4591)

Training Scores
Accuracy: 0.6149837133550489
Recall: 0.8342440801457195
Specificity: 0.49290060851926976
Precision: 0.4780793319415449
F1: 0.6078301260783012
ROC AUC: 0.7415723591113477
--------------------------------
Test Scores
Accuracy: 0.533203125
Recall: 0.7049180327868853
Specificity: 0.4376899696048632
Precision: 0.410828025477707
F1: 0.5191146881287726
ROC AUC: 0.6384307472553025
0.5332	0.7049	0.4377	0.4108	0.5191	0.6384


### KNN

n_neighbors: 3, weights: uniform, p: 1

In [117]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(
    n_neighbors=10
).fit(XTrain, yTrain)

In [118]:
print_scoring_metrics(knn)

Training Scores
Accuracy: 0.6944625407166124
Recall: 0.2896174863387978
Specificity: 0.9198782961460447
Precision: 0.6680672268907563
F1: 0.4040660736975858
ROC AUC: 0.7414033259808541
--------------------------------
Test Scores
Accuracy: 0.623046875
Recall: 0.18032786885245902
Specificity: 0.8693009118541033
Precision: 0.4342105263157895
F1: 0.2548262548262548
ROC AUC: 0.5997890610726327
0.6230	0.1803	0.8693	0.4342	0.2548	0.5998


In [119]:
find_recall_threshold(knn)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [101]:
using_recall_threshold(knn, 0.334)

Training Scores
Accuracy: 0.7993485342019544
Recall: 0.6211293260473588
Specificity: 0.8985801217038539
Precision: 0.7732426303854876
F1: 0.6888888888888889
ROC AUC: 0.8599592842601521
--------------------------------
Test Scores
Accuracy: 0.58984375
Recall: 0.31693989071038253
Specificity: 0.7416413373860182
Precision: 0.40559440559440557
F1: 0.3558282208588957
ROC AUC: 0.5610228046572657
0.5898	0.3169	0.7416	0.4056	0.3558	0.5610


### XGBoost

grow_policy: depthwise, max_depth: 2, n_estimators: 150

In [102]:
from xgboost import XGBClassifier

xgb = XGBClassifier(
    grow_policy = 'depthwise',
    max_depth = 2,
    n_estimators = 150
).fit(XTrain, yTrain)

In [103]:
print_scoring_metrics(xgb)

Training Scores
Accuracy: 0.7713355048859935
Recall: 0.5373406193078324
Specificity: 0.9016227180527383
Precision: 0.7525510204081632
F1: 0.6269925611052072
ROC AUC: 0.8427234470196595
--------------------------------
Test Scores
Accuracy: 0.626953125
Recall: 0.3005464480874317
Specificity: 0.8085106382978723
Precision: 0.4661016949152542
F1: 0.3654485049833887
ROC AUC: 0.6213729300579667
0.6270	0.3005	0.8085	0.4661	0.3654	0.6214


In [107]:
y_train_pred_proba = xgb.predict_proba(XTrain)
y_test_pred_proba = xgb.predict_proba(XTest)
for threshold in np.linspace(0, 1, 999):
    y_train_pred = (y_train_pred_proba[:, 1] >= threshold).astype(int)
    y_test_pred = (y_test_pred_proba[:,1]>=threshold).astype(int)
    if recall_score(yTest, y_test_pred) > 0.68 and recall_score(yTest, y_test_pred) < 0.72:
        print(threshold)
        print('Training Scores')
        print(f'Accuracy: {accuracy_score(yTrain, y_train_pred)}')
        print(f'Recall: {recall_score(yTrain, y_train_pred)}')
        print(f'Specificity: {recall_score(yTrain, y_train_pred, pos_label=0)}')
        print(f'Precision: {precision_score(yTrain, y_train_pred)}')
        print(f'F1: {f1_score(yTrain, y_train_pred)}')
        print(f'ROC AUC: {roc_auc_score(yTrain, y_train_pred)}')
        print('--------------------------------')
        print('Test Scores')
        print(f'Accuracy: {accuracy_score(yTest, y_test_pred)}')
        print(f'Recall: {recall_score(yTest, y_test_pred)}')
        print(f'Specificity: {recall_score(yTest, y_test_pred, pos_label=0)}')
        print(f'Precision: {precision_score(yTest, y_test_pred)}')
        print(f'F1: {f1_score(yTest, y_test_pred)}')
        print(f'ROC AUC: {roc_auc_score(yTest, y_test_pred)}')
        print(f'{accuracy_score(yTest, y_test_pred):.4f}\t{recall_score(yTest, y_test_pred):.4f}\t{recall_score(yTest, y_test_pred, pos_label=0):.4f}\t{precision_score(yTest, y_test_pred):.4f}\t{f1_score(yTest, y_test_pred):.4f}\t{roc_auc_score(yTest, y_test_pred):.4f}')


0.282565130260521
Training Scores
Accuracy: 0.6781758957654723
Recall: 0.9034608378870674
Specificity: 0.552738336713996
Precision: 0.5293489861259338
F1: 0.6675639300134589
ROC AUC: 0.7280995873005317
--------------------------------
Test Scores
Accuracy: 0.564453125
Recall: 0.7158469945355191
Specificity: 0.48024316109422494
Precision: 0.4337748344370861
F1: 0.5402061855670103
ROC AUC: 0.598045077814872
0.5645	0.7158	0.4802	0.4338	0.5402	0.5980
0.283567134268537
Training Scores
Accuracy: 0.6801302931596092
Recall: 0.9034608378870674
Specificity: 0.5557809330628803
Precision: 0.5310492505353319
F1: 0.6689143627781524
ROC AUC: 0.729620885474974
--------------------------------
Test Scores
Accuracy: 0.564453125
Recall: 0.7158469945355191
Specificity: 0.48024316109422494
Precision: 0.4337748344370861
F1: 0.5402061855670103
ROC AUC: 0.598045077814872
0.5645	0.7158	0.4802	0.4338	0.5402	0.5980
0.28456913827655306
Training Scores
Accuracy: 0.6807817589576547
Recall: 0.9034608378870674
Specif

In [105]:
train_preds = xgb.predict(XTrain)
test_preds = xgb.predict_proba(XTest)
print('Training Scores')
print(f'Accuracy: {accuracy_score(yTrain, train_preds)}')
print(f'Recall: {recall_score(yTrain, train_preds)}')
print(f'Specificity: {recall_score(yTrain, train_preds, pos_label=0)}')
print(f'Precision: {precision_score(yTrain, train_preds)}')
print(f'F1: {f1_score(yTrain, train_preds)}')
print(f'ROC AUC: {roc_auc_score(yTrain, train_preds)}')
print('--------------------------------')
print('Test Scores')
print(f'Accuracy: {accuracy_score(yTest, test_preds)}')
print(f'Recall: {recall_score(yTest, test_preds)}')
print(f'Specificity: {recall_score(yTest, test_preds, pos_label=0)}')
print(f'Precision: {precision_score(yTest, test_preds)}')
print(f'F1: {f1_score(yTest, test_preds)}')
print(f'ROC AUC: {roc_auc_score(yTest, test_preds)}')

Training Scores
Accuracy: 0.7713355048859935
Recall: 0.5373406193078324
Specificity: 0.9016227180527383
Precision: 0.7525510204081632
F1: 0.6269925611052072
ROC AUC: 0.7194816686802854
--------------------------------
Test Scores


ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets

### Naive Bayes

In [120]:
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline

nb = GaussianNB().fit(XTrain, yTrain)

In [121]:
print_scoring_metrics(nb)

Training Scores
Accuracy: 0.43778501628664496
Recall: 0.9617486338797814
Specificity: 0.1460446247464503
Precision: 0.3854014598540146
F1: 0.5502866076081292
ROC AUC: 0.6476351987940456
--------------------------------
Test Scores
Accuracy: 0.439453125
Recall: 0.9672131147540983
Specificity: 0.1458966565349544
Precision: 0.3864628820960699
F1: 0.5522620904836193
ROC AUC: 0.6206587273905029
0.4395	0.9672	0.1459	0.3865	0.5523	0.6207


In [110]:
find_recall_threshold(nb)

In [122]:
y_pred_proba = nb.predict_proba(XTest)
for threshold in np.linspace(0, 1, 99):
    y_pred = (y_pred_proba[:,1]>=threshold).astype(int)
    #if recall_score(yTest, y_pred) > 0.6 and recall_score(yTest, y_pred) < 0.79:
    print(f'{threshold}: threshold')
    print(recall_score(yTest, y_pred))

0.0: threshold
1.0
0.01020408163265306: threshold
0.9672131147540983
0.02040816326530612: threshold
0.9672131147540983
0.030612244897959183: threshold
0.9672131147540983
0.04081632653061224: threshold
0.9672131147540983
0.0510204081632653: threshold
0.9672131147540983
0.061224489795918366: threshold
0.9672131147540983
0.07142857142857142: threshold
0.9672131147540983
0.08163265306122448: threshold
0.9672131147540983
0.09183673469387754: threshold
0.9672131147540983
0.1020408163265306: threshold
0.9672131147540983
0.11224489795918366: threshold
0.9672131147540983
0.12244897959183673: threshold
0.9672131147540983
0.13265306122448978: threshold
0.9672131147540983
0.14285714285714285: threshold
0.9672131147540983
0.1530612244897959: threshold
0.9672131147540983
0.16326530612244897: threshold
0.9672131147540983
0.17346938775510204: threshold
0.9672131147540983
0.18367346938775508: threshold
0.9672131147540983
0.19387755102040816: threshold
0.9672131147540983
0.2040816326530612: threshold
0.

In [49]:
using_recall_threshold(nb, 0.9897959183673468)

Training Scores
Accuracy: 0.4762214983713355
Recall: 0.9143897996357013
Specificity: 0.23225152129817445
Precision: 0.3987291501191422
F1: 0.5553097345132744
ROC AUC: 0.6476351987940456
--------------------------------
Test Scores
Accuracy: 0.4609375
Recall: 0.8797814207650273
Specificity: 0.22796352583586627
Precision: 0.38795180722891565
F1: 0.5384615384615384
ROC AUC: 0.6206587273905029
0.4609	0.8798	0.2280	0.3880	0.5385	0.6207


### Logistic Regression

In [28]:
from sklearn.linear_model import LogisticRegression
#from sklearn.model_selection import GridSearchCV

log_reg = LogisticRegression(
    max_iter=500,
    class_weight='balanced'
).fit(XTrain, yTrain)

print_scoring_metrics(log_reg)

Training Scores
Accuracy: 0.6371335504885993
Recall: 0.6939890710382514
Specificity: 0.6054766734279919
Precision: 0.4948051948051948
F1: 0.577710386656558
ROC AUC: 0.7107205799221894
--------------------------------
Test Scores
Accuracy: 0.6171875
Recall: 0.6338797814207651
Specificity: 0.60790273556231
Precision: 0.47346938775510206
F1: 0.5420560747663551
ROC AUC: 0.6643579650206788
0.6172	0.6339	0.6079	0.4735	0.5421	0.6644


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [47]:
find_recall_threshold(log_reg)

0.6972602739726027
0.4897959183673469
Training Scores
Accuracy: 0.6260586319218241
Recall: 0.7085610200364298
Specificity: 0.5801217038539553
Precision: 0.48443337484433374
F1: 0.5754437869822485
ROC AUC: 0.7107205799221894
--------------------------------
Test Scores
Accuracy: 0.619140625
Recall: 0.6666666666666666
Specificity: 0.5927051671732523
Precision: 0.4765625
F1: 0.5558086560364465
ROC AUC: 0.6643579650206788


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [57]:
using_recall_threshold(log_reg, 0.4701)

Training Scores
Accuracy: 0.6221498371335505
Recall: 0.7540983606557377
Specificity: 0.5486815415821501
Precision: 0.4819557625145518
F1: 0.5880681818181818
ROC AUC: 0.7107205799221894
--------------------------------
Test Scores
Accuracy: 0.60546875
Recall: 0.6939890710382514
Specificity: 0.5562310030395137
Precision: 0.4652014652014652
F1: 0.5570175438596491
ROC AUC: 0.6643579650206788
0.6055	0.6940	0.5562	0.4652	0.5570	0.6644
