# Computing AUC

We previously completed all evaluations in Notebook 10. In this notebook, we will compute the AUC for each model, save the best models, and plot the ROC curves.

In [1]:
import os

import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

import egoviz.models.evaluation as ev
import egoviz.models.processing as pr
import egoviz.visualize as viz

SEED = 42

pd.set_option("display.max_rows", 300)

cwd = os.getcwd()

In [2]:
data = pr.load_pickle(os.path.join(cwd, '../data/home_data_all_preds_df.pkl'))
df_active = pr.generate_binary_presence_df(data)
df_binary_active_scaled = pr.row_wise_min_max_scaling(df_active)

data = pr.load_pickle(os.path.join(cwd, '../data/home_data_all_preds.pkl'))
df_counts = pr.generate_counts_df(pr.generate_df_from_preds(data))
df_counts_active_scaled = pr.row_wise_min_max_scaling(df_counts)

In [3]:
models_binary = [
    ('Logistic Regression', LogisticRegression(max_iter=1000, random_state=SEED, class_weight='balanced')),
    ('Random Forest', RandomForestClassifier(random_state=SEED, class_weight='balanced')),
    ('Gradient Boosting', GradientBoostingClassifier(random_state=SEED)),
    ('XGBoost', XGBClassifier(random_state=SEED)),
    ('MLP', MLPClassifier(random_state=SEED, learning_rate='adaptive', max_iter=1000, early_stopping=True))
]

models_counts = [
    ('Logistic Regression', LogisticRegression(max_iter=1000, random_state=SEED, class_weight='balanced')),
    ('Random Forest', RandomForestClassifier(random_state=SEED, class_weight='balanced')),
    ('Gradient Boosting', GradientBoostingClassifier(random_state=SEED)),
    ('XGBoost', XGBClassifier(random_state=SEED)),
    ('MLP', MLPClassifier(random_state=SEED, learning_rate='adaptive', max_iter=1000, early_stopping=True))
]

In [4]:
binary_results, binary_results_df = ev.evaluate_models(models_binary, df_binary_active_scaled, LabelEncoder())
counts_results, counts_results_df = ev.evaluate_models(models_counts, df_counts_active_scaled, LabelEncoder())

2024-01-25 14:12:09,627 - root - INFO - LOGOCV complete for LogisticRegression
2024-01-25 14:12:15,999 - root - INFO - LOGOCV complete for RandomForestClassifier
2024-01-25 14:13:26,894 - root - INFO - LOGOCV complete for GradientBoostingClassifier
2024-01-25 14:13:32,465 - root - INFO - LOGOCV complete for XGBClassifier
2024-01-25 14:13:36,612 - root - INFO - LOGOCV complete for MLPClassifier
2024-01-25 14:13:37,353 - root - INFO - LOGOCV complete for LogisticRegression
2024-01-25 14:13:50,382 - root - INFO - LOGOCV complete for RandomForestClassifier
2024-01-25 14:17:11,715 - root - INFO - LOGOCV complete for GradientBoostingClassifier
2024-01-25 14:17:19,919 - root - INFO - LOGOCV complete for XGBClassifier
2024-01-25 14:17:25,703 - root - INFO - LOGOCV complete for MLPClassifier


In [12]:
ev.display_pct_table(binary_results_df, threshold=0.5)

Unnamed: 0,model,median_f1,pct_above_0.5
0,GradientBoostingClassifier,0.767482,0.88
1,LogisticRegression,0.811589,1.0
2,MLPClassifier,0.802238,0.81
3,RandomForestClassifier,0.797958,0.81
4,XGBClassifier,0.799526,0.88


In [13]:
ev.display_pct_table(counts_results_df, threshold=0.5)

Unnamed: 0,model,median_f1,pct_above_0.5
0,GradientBoostingClassifier,0.760268,0.88
1,LogisticRegression,0.765218,0.94
2,MLPClassifier,0.745355,0.88
3,RandomForestClassifier,0.772283,0.81
4,XGBClassifier,0.738499,0.88


In [7]:
[(result.clf, result.auc) for result in binary_results]

[('Logistic Regression', 0.9447056365829677),
 ('Random Forest', 0.9290431890816366),
 ('Gradient Boosting', 0.9250543389842623),
 ('XGBoost', 0.9304546543761788),
 ('MLP', 0.939277343425017)]

In [8]:
 [(result.clf, result.auc) for result in counts_results]

[('Logistic Regression', 0.9116002051267486),
 ('Random Forest', 0.9241585056522278),
 ('Gradient Boosting', 0.9217500361825736),
 ('XGBoost', 0.9218572615087869),
 ('MLP', 0.9102456161272194)]