In [1]:
import os
import joblib
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

In [2]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import PolynomialFeatures


def load_data(file_name, validation=False):
    curr_file = None
    if validation:
        curr_file = os.path.join(os.getcwd().replace('models', 'data'), 'validation', file_name)
    else: 
        curr_file = os.path.join(os.getcwd().replace('models', 'data'), file_name)
    return pd.read_csv(curr_file, delimiter=",")

# Load data
data = load_data('data.csv')

# Extract features and labels for each dataset
X_train, y_train = data.drop('Outcome', axis=1), data['Outcome']

scaler = MinMaxScaler()
scaler.fit(X_train)

# Load validation data
validation_data = load_data('validation.csv', validation=True)


def generate_metrics(model_path, model_name):
    
    # Extract features and labels for each dataset
    X_test, y_test = validation_data.drop('Outcome', axis=1), validation_data['Outcome']
    X_test = scaler.transform(X_test)

    # Load model
    model = joblib.load(model_path) 

    if 'poly' in model_name:
        # Load data
        data = load_data('data.csv')
        # Extract features and labels for each dataset
        X_train, y_train = data.drop('Outcome', axis=1), data['Outcome']
        
        poly = PolynomialFeatures(degree=5)
        poly_fit = poly.fit(X_train)
        X_train = poly_fit.transform(X_train)
        X_test = poly_fit.transform(X_test)
    
        # Feature selection
        selector = SelectKBest(f_classif, k=10)
        selector_fit = selector.fit(X_train, y_train)
        X_test = selector_fit.transform(X_test)

    # Predict on test set
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    # Compute metrics
    auc = roc_auc_score(y_test, y_prob)
    recall = recall_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred,average='weighted')

    return auc, recall, precision, f1

def run_models():
    model_name = None
    model_path = None
    model_metrics = []
    for file in os.listdir(os.path.join(os.getcwd(), 'store')):
        # Load model variables
        model_name = file.split('.')[0]
        model_path = os.path.join(os.getcwd(), 'store', file)

        # Evaluate models
        metrics = generate_metrics(model_path, model_name)

        # Combine metrics into a single table for polynomial regression
        metrics_table = pd.DataFrame({
            'Model': [model_name],
            'AUC': [metrics[0]],
            'Recall': [metrics[1]],
            'Precision': [metrics[2]],
            'F1': [metrics[3]]
        })
        model_metrics.append(metrics_table)

    # Combine metrics into a single table
    if model_metrics:
        return pd.concat(model_metrics)
    else:
        return None

In [3]:
df = run_models()

In [4]:
df.sort_values(by=['F1'], ascending=False)

Unnamed: 0,Model,AUC,Recall,Precision,F1
0,random_forrest_smote,0.749945,0.698492,0.692286,0.694228
0,random_forrest,0.756726,0.708543,0.698422,0.69388
0,naive_bayes_smote,0.740376,0.688442,0.707415,0.693548
0,neural_network,0.74311,0.713568,0.709183,0.687457
0,naive_bayes_data,0.739392,0.703518,0.693161,0.683552
0,svm_data,0.751312,0.703518,0.693161,0.683552
0,svm_smote,0.749016,0.668342,0.703797,0.674793
0,logistic_regression_data,0.742454,0.668342,0.693712,0.674391
0,logistic_regression_smote,0.743438,0.668342,0.693712,0.674391
0,neural_network_smote,0.743001,0.668342,0.693712,0.674391
