In [57]:
# Imports

# import utility modules
import pandas as pd
import numpy as np
import configparser
import os

# import optuna
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

# import joblib
from joblib import dump, load

# helper functions and classes
from helpers.helper_functions import transform_data, add_actuals
from helpers.helper_classes import AddFeatureNames, Gene_SPCA

# sklearn
from sklearn.decomposition import PCA, SparsePCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, roc_auc_score, roc_curve, RocCurveDisplay, f1_score
from sklearn.metrics import recall_score, precision_score, accuracy_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# LightGBM
from lightgbm import LGBMClassifier

# feature_engine
from feature_engine.selection import DropFeatures, DropConstantFeatures, DropDuplicateFeatures

# Read config.ini file
config = configparser.ConfigParser()
config.read('src/config.ini')
os.chdir(config['PATH']['ROOT_DIR'])

# Read data
data = load(config['PATH']['DATA_DIR'] + '/microarray-data-dict.lib')

# Read parameters
SEED = config.getint('PARAMS', 'SEED')
N_COMPONENTS = config.getint('PARAMS', 'N_COMPONENTS')


In [99]:
from sklearn.base import clone

def acc_modified(y_test, y_pred, average='macro'):
    return accuracy_score(y_test, y_pred)

# def roc_auc_modified(y_test, y_pred, average = 'macro', multi_class = 'ovo'):
#     return roc_auc_score(y_test, y_pred, multi_class=multi_class)

clf_dict = {
            'lr': LogisticRegression(random_state=SEED, max_iter = 10000),
            'lgbm': LGBMClassifier(random_state=SEED),
            'svc': SVC(random_state=SEED),
            'knn':KNeighborsClassifier()}

results_dict = {}
dataset_list = ['golub', 'christensen', 'chin', 'nakayama']

metrics = {'acc':acc_modified,
            'f1-macro':f1_score, 
            'recall-macro':recall_score, 
            'precision-macro':precision_score}
            # 'roc_auc_score':roc_auc_modified}

for key, dataset in data.items():
    # Skip if key not in dataset_list, only for testing!
    # if key not in dataset_list:
    #     continue
    
    results_dict[key] = {}
    print("-" * 80)
    print(f"Author: {key}")
    print("-" * 80)

    for clf_name, clf_obj in clf_dict.items():
        results_dict[key][clf_name] = {}
        clf = clone(clf_obj)
        print(f"classifier: {clf_name}")
        print('-' * 30)

        for transform_name, transform_data in dataset.items():
            results_dict[key][clf_name][transform_name] = {}

            # Fit model
            clf.fit(transform_data['X_train'], transform_data['y_train'][0])

            # Calculate metrics
            X_test = transform_data['X_test']
            y_test = transform_data['y_test'][0]

            for metric_name, metric in metrics.items():
                results_dict[key][clf_name][transform_name][metric_name] = metric(y_test, clf.predict(X_test), average='macro')
# Count number of times a single tranform wins




--------------------------------------------------------------------------------
Author: yeoh
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: nakayama
--------------------------------------------------------------------------------
classifier: lr
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: lgbm
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: svc
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


--------------------------------------------------------------------------------
Author: golub
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: khan
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: west
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
clas

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: alon
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: subramanian
--------------------------------------------------------------------------------
classifier: lr
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: lgbm
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: svc
------------------------------
classifier: knn
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


--------------------------------------------------------------------------------
Author: burczynski
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: chin
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: borovecki
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: shipp
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: tian
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


--------------------------------------------------------------------------------
Author: gordon
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: chiaretti
--------------------------------------------------------------------------------
classifier: lr
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: lgbm
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: svc
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


--------------------------------------------------------------------------------
Author: sorlie
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: chowdary
--------------------------------------------------------------------------------
classifier: lr
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: sun
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: pomeroy
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: gravier
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: su
--------------------------------------------------------------------------------
classifier: lr
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: christensen
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------
--------------------------------------------------------------------------------
Author: singh
--------------------------------------------------------------------------------
classifier: lr
------------------------------
classifier: lgbm
------------------------------
classifier: svc
------------------------------
classifier: knn
------------------------------


In [100]:
# Let's only count accuracy wins first
def list_transforms_metrics(results_dict):
    metrics = []
    transform_win_counter = {}
    hasRun = False
    for dname, dobj in results_dict.items():
        for clf_name, clf_obj in dobj.items():
            for tname, tobj in clf_obj.items():
                if tname not in transform_win_counter:
                    transform_win_counter[tname] = 0
                for metric_name, metric in tobj.items():
                    if metric_name not in metrics:
                        metrics.append(metric_name)
    return transform_win_counter, metrics

counter, metrics = list_transforms_metrics(results_dict)

counter

count_results = {}
# Loop over metrics found in results dictionary
for metric in metrics:
    count_results[metric] = counter.copy()
    cur_counter = count_results[metric]
    cur_counter['ties'] = 0
    for dname, dobj in results_dict.items():
        for clf_name, clf_obj in dobj.items():
            cur_max = 0
            for tname, tobj in clf_obj.items():
                if tobj[metric] > cur_max:
                    cur_max = tobj[metric]
                    max_tname = tname
                elif tobj[metric] == cur_max:
                    max_tname = ''
            if max_tname == '':
                cur_counter['ties'] += 1
                continue
            cur_counter[max_tname] += 1




In [101]:
count_results

#Print count results to pandas dataframe
df = pd.DataFrame(count_results)
df.columns = ['Accuracy', 'F1', 'Recall', 'Precision']

print(df.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  Accuracy &  F1 &  Recall &  Precision \\
\midrule
none &        23 &  30 &      28 &         29 \\
pca  &        13 &  13 &      13 &         15 \\
spca &        11 &  14 &      13 &         13 \\
ties &        41 &  31 &      34 &         31 \\
\bottomrule
\end{tabular}



  print(df.to_latex())
