In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from utils.df_loader import (  load_electricity_mixed_df,
    load_eye_movements_mixed_df,
    load_covertype_mixed_df,
    load_albert_df,
    load_road_safety_df,
    load_california_df,
    load_credit_df,
    load_heloc_df,
    load_jannis_df,
    load_Diabetes130US_df,
    load_eye_movements_df,
    load_Higgs_df,
    load_default_of_credit_card_clients_df,
    load_MiniBooNE_df,
    load_bank_marketing_df,
    load_Bioresponse_df,
    load_bank_marketing_df,
    load_MagicTelescope_df,
    load_house_16H_df,
    load_pol_df,
    load_electricity_df,
    load_covertype_df,
    load_adult_df,
    load_compas_df,
    load_german_df,
    load_diabetes_df,
    load_breast_cancer_df,)
from utils.preprocessing import preprocess_df,get_columns_type
from sklearn.model_selection import train_test_split
from utils.dice import generate_dice_result, process_results
from utils.models import train_three_models, evaluation_test, save_three_models, load_three_models
from utils.save import save_result_as_csv

pd.options.mode.chained_assignment = None 

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)


TF version:  2.15.0
Eager execution enabled:  True


In [2]:
all_dataset_names = [
    "electricity_mixed", 
    "eye_movements_mixed", 
    "covertype_mixed", 
    "albert", 
    "road-safety",
    "california",
    "credit",
    "heloc",
    "jannis",
    "Diabetes130US",
    "eye_movements",
    "Higgs",
    "default-of-credit-card-clients",
    "MiniBooNE",
    "bank-marketing",
    "Bioresponse",
    "bank-marketing",
    "MagicTelescope",
    "house_16H",
    "pol",
    "covertype",
    "electricity",
    "adult",
    "german",
    "compas",
    "diabetes",
    "breast_cancer",
]

all_algorithm_names = ["dice", "GS", "proto", "watcher"]

all_models = ["dt", "rfc", "nn"]


In [9]:
#### Select dataset ####'

all_dataset_names = [
    # "electricity_mixed", 
    # "eye_movements_mixed", 
    # "covertype_mixed", 
    # "albert", 
    # "road-safety",
    # "california",
    # "credit",
    # "heloc",
    # "jannis",
    # "Diabetes130US",
    # "eye_movements",
    # "Higgs",
    # "default-of-credit-card-clients",
    # "MiniBooNE",
    # "bank-marketing",
    # "Bioresponse",
    # "bank-marketing",
    # "MagicTelescope",
    # "house_16H",
    # "pol",
    # "covertype",
    # "electricity",
    "adult",
    "german",
    "compas",
    "diabetes",
    "breast_cancer",
]

for dataset_name in all_dataset_names:


    if dataset_name == "electricity_mixed":
            dataset_loading_fn = load_electricity_mixed_df
    elif dataset_name == "eye_movements_mixed":
        dataset_loading_fn = load_eye_movements_mixed_df
    elif dataset_name == "covertype_mixed":
        dataset_loading_fn = load_covertype_mixed_df
    elif dataset_name == "albert":
        dataset_loading_fn = load_albert_df
    elif dataset_name == "road-safety":
        dataset_loading_fn = load_road_safety_df
    elif dataset_name == "california":
        dataset_loading_fn = load_california_df
    elif dataset_name == "credit":
        dataset_loading_fn = load_credit_df
    elif dataset_name == "heloc":
        dataset_loading_fn = load_heloc_df
    elif dataset_name == "jannis":
        dataset_loading_fn = load_jannis_df
    elif dataset_name == "Diabetes130US":
        dataset_loading_fn = load_Diabetes130US_df
    elif dataset_name == "eye_movements":
        dataset_loading_fn = load_eye_movements_df
    elif dataset_name == "Higgs":
        dataset_loading_fn = load_Higgs_df
    elif dataset_name == "default-of-credit-card-clients":
        dataset_loading_fn = load_default_of_credit_card_clients_df
    elif dataset_name == "MiniBooNE":
        dataset_loading_fn = load_MiniBooNE_df
    elif dataset_name == "bank-marketing":
        dataset_loading_fn = load_bank_marketing_df
    elif dataset_name == "Bioresponse":
        dataset_loading_fn = load_Bioresponse_df
    elif dataset_name == "MagicTelescope":
        dataset_loading_fn = load_MagicTelescope_df
    elif dataset_name == "house_16H":
        dataset_loading_fn = load_house_16H_df
    elif dataset_name == "pol":
        dataset_loading_fn = load_pol_df
    elif dataset_name == "pol":
        dataset_loading_fn = load_pol_df
    elif dataset_name == "covertype":
        dataset_loading_fn = load_covertype_df
    elif dataset_name == "electricity":
        dataset_loading_fn = load_electricity_df
    elif dataset_name == "adult":
        dataset_loading_fn = load_adult_df
    elif dataset_name == "german":
        dataset_loading_fn = load_german_df
    elif dataset_name == "compas":
        dataset_loading_fn = load_compas_df
    elif dataset_name == "diabetes":
        dataset_loading_fn = load_diabetes_df
    elif dataset_name == "breast_cancer":
        dataset_loading_fn = load_breast_cancer_df
    else:
        raise Exception("Unsupported dataset")


    print("Dataset: ", dataset_name)

    #### Load datafram info.
    df_info = preprocess_df(dataset_loading_fn)

    ### Seperate to train and test set.
    train_df, test_df = train_test_split(df_info.dummy_df, train_size=.8, random_state=seed, shuffle=True)

    ### Get training and testing array.
    X_train = np.array(train_df[df_info.ohe_feature_names])
    y_train = np.array(train_df[df_info.target_name])
    X_test = np.array(test_df[df_info.ohe_feature_names])
    y_test = np.array(test_df[df_info.target_name])

    models = train_three_models(X_train, y_train)
    save_three_models(models, dataset_name, path='./saved_models')

    ### Load models.
    models = load_three_models(X_train.shape[-1], dataset_name)

    ### Print out accuracy on testset.
    evaluation_test(models, X_test, y_test)

    print("\n")
    print("\n")


Dataset:  adult
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


Model: name Decision Tree: & 0.8197	& 0.6361	& 0.6072	& 0.6213
Model: name Random Forest: & 0.8469	& 0.7117	& 0.6242	& 0.6651
Model: name Neural Network: & 0.8506	& 0.7600	& 0.5649	& 0.6481
Model: name SVM Linear: & 0.8346	& 0.7173	& 0.5296	& 0.6094




Dataset:  german
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


Model: name Decision Tree: & 0.6500	& 0.4286	& 0.4426	& 0.4355
Model: name Random Forest: & 0.7700	& 0.6829	& 0.4590	& 0.5490
Model: name Neural Network: & 0.7650	& 0.6522	& 0.4918	& 0.5607
Model: name SVM Linear: & 0.7700	& 0.7143	& 0.4098	& 0.5208




Dataset:  compas
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


Model: name Decision Tree: & 0.7387	& 0.8480	& 0.7974	& 0.8219
Model: name Random Forest: & 0.7893	& 0.8529	& 0.8717	& 0.8622
Model: name Neural Network: & 0.8170	& 0.8783	& 0.8799	& 0.8791
Model: name SVM Linear: & 0.7990	& 0.8335	& 0.9175	& 0.8735




Dataset:  diabetes
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


Model: name Decision Tree: & 0.7662	& 0.6719	& 0.7414	& 0.7049
Model: name Random Forest: & 0.7727	& 0.7170	& 0.6552	& 0.6847
Model: name Neural Network: & 0.7662	& 0.8235	& 0.4828	& 0.6087
Model: name SVM Linear: & 0.7922	& 0.7826	& 0.6207	& 0.6923




Dataset:  breast_cancer
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: name Decision Tree: & 0.9737	& 0.9750	& 0.9512	& 0.9630
Model: name Random Forest: & 0.9912	& 1.0000	& 0.9756	& 0.9877
Model: name Neural Network: & 0.9737	& 1.0000	& 0.9268	& 0.9620
Model: name SVM Linear: & 0.9825	& 1.0000	& 0.9512	& 0.9750






  saving_api.save_model(


