In [1]:
from consensusModel import clsf
import pandas as pd, numpy as np
from tqdm import tqdm

In [2]:
nCV=100
folds = 5

In [3]:
X = pd.read_csv("data/trainMatrix.csv")

In [4]:
Y = pd.read_csv("data/curedData.csv", usecols=["label"]).squeeze()

## CV folds preparation

In [5]:
from sklearn.model_selection import StratifiedKFold
import random

In [6]:
from sklearn import metrics as _metrics

## stratified Cross Validation

In [7]:
CVfoldsData = pd.read_csv("data/CVfoldsData.csv", index_col=0, header=[0,1])

## Results

In [8]:
from utilsMetric import metrics, getTableFromStats

In [9]:
allScores  = pd.read_csv("results/CVscores.csv", index_col=0, header = [0,1])

In [10]:
consensusScores =pd.concat( 
            [allScores[cv].mean(axis=1) for cv in allScores.columns.levels[0] ], 
            axis=1, names = range(nCV))

In [11]:
consensusPreds=consensusScores.applymap(lambda x : 1 if x>=.5 else 0)

In [12]:
metricsConsensus = pd.DataFrame(columns=consensusPreds.columns,
                        index=[*metrics] 
                           )

for metrName, metrFun in metrics.items():
    metricsConsensus.loc[metrName] = [ metrFun(Y, preds, scores)
                        for preds,scores in zip(consensusPreds.values.T, consensusScores.values.T)  ]

In [14]:
getTableFromStats(metricsConsensus.T.astype(float).describe())

Unnamed: 0,median,"(1st quartile, 3rd quartile)"
spec,0.2741935483870967,"(0.2661290322580645, 0.28225806451612906)"
sens,0.9835796387520526,"(0.9819376026272578, 0.986863711001642)"
balacc,0.6288865935695747,"(0.6237916468033264, 0.635381905821283)"
mcc,0.4101303213149657,"(0.3929275391741598, 0.4279124032778319)"
ppv,0.8693759071117562,"(0.867705893424795, 0.8712011577424024)"
npv,0.7763888888888889,"(0.75, 0.813953488372093)"
PRcurve,0.9099807890559276,"(0.9044372890909018, 0.9142253457325145)"
ROCcurve,0.7367941893108745,"(0.7273144101912178, 0.7455588881826368)"


## Indipendent classifiers results

In [16]:
indipendentPreds = allScores.applymap(lambda x : 1 if x>=.5 else 0)

In [17]:
indipendentMetrics = pd.DataFrame(columns=indipendentPreds.columns,
                        index=[*metrics] 
                           )

for metrName, metrFun in metrics.items():
    indipendentMetrics.loc[metrName] = [ metrFun(Y,preds,scores)
                        for preds, scores in zip(indipendentPreds.values.T,allScores.values.T)  ]

In [18]:
indipendentMetricsDict = { 
    c:  indipendentMetrics.loc[:,(slice(None),c)].T.astype(float).describe()
        for c in clsf
                            }

In [19]:
results = pd.concat([ getTableFromStats(indipendentMetricsDict[c]) for c in clsf ], axis=1)

In [20]:
results.columns = pd.MultiIndex.from_product((clsf, results.iloc[:,:2].columns))

In [21]:
results

Unnamed: 0_level_0,RF,RF,SVM,SVM,XGB,XGB,KNN,KNN,ADA,ADA
Unnamed: 0_level_1,median,"(1st quartile, 3rd quartile)",median,"(1st quartile, 3rd quartile)",median,"(1st quartile, 3rd quartile)",median,"(1st quartile, 3rd quartile)",median,"(1st quartile, 3rd quartile)"
spec,0.3064516129032258,"(0.29838709677419356, 0.33064516129032256)",0.1532258064516129,"(0.1431451612903226, 0.1693548387096774)",0.3548387096774194,"(0.3387096774193548, 0.3709677419354839)",0.2580645161290322,"(0.24798387096774194, 0.26814516129032256)",0.3225806451612903,"(0.31451612903225806, 0.3467741935483871)"
sens,0.9704433497536946,"(0.9683908045977012, 0.9753694581280788)",0.9901477832512317,"(0.9880952380952381, 0.9917898193760263)",0.9507389162561576,"(0.9458128078817734, 0.9556650246305419)",0.973727422003284,"(0.9704433497536946, 0.9770114942528736)",0.9507389162561576,"(0.9458128078817734, 0.9556650246305419)"
balacc,0.638857990359659,"(0.6327731871391493, 0.6497232374596112)",0.5716867948514223,"(0.5656201996927803, 0.5812840987340432)",0.6518949626569204,"(0.6430822607129615, 0.6608533290958207)",0.616233645849886,"(0.6100892526087187, 0.6214974310079984)",0.6391228348959161,"(0.6307984400656814, 0.6487019307166694)"
mcc,0.3939154177180763,"(0.374229975481678, 0.42018248039064154)",0.2961433703512155,"(0.2754783994523376, 0.32382002743607924)",0.3768330528556742,"(0.36085025012204003, 0.39687944242117523)",0.3517419675117438,"(0.33351974564392595, 0.3654260242968346)",0.3538666673204373,"(0.33205841686919163, 0.3724707715702157)"
ppv,0.8731453352705498,"(0.8712322792190144, 0.8764705882352941)",0.8516949152542372,"(0.8498243392255145, 0.8545710716932419)",0.878419452887538,"(0.8755690440060698, 0.8812785388127854)",0.865979381443299,"(0.8637351778656126, 0.8676470588235294)",0.8738738738738738,"(0.8713365764642231, 0.8772727272727273)"
npv,0.6851851851851852,"(0.6602830188679245, 0.7149595687331536)",0.7593103448275862,"(0.7142857142857143, 0.8)",0.5918272794662713,"(0.5704633204633205, 0.6178405572755419)",0.673469387755102,"(0.6415094339622641, 0.6956521739130435)",0.5771939328277356,"(0.5483354673495519, 0.5972222222222222)"
PRcurve,0.9213486620433166,"(0.9181802438455485, 0.9248661247799793)",0.884345233622349,"(0.8790598331605783, 0.8901862763989615)",0.8936535455892546,"(0.8896024503656084, 0.8972964855102302)",0.92359015900943,"(0.9216776148087398, 0.9268784565924392)",0.8974088035619099,"(0.8938559784823525, 0.9052630079305527)"
ROCcurve,0.7400451559934318,"(0.729663250172149, 0.747747166163462)",0.696299459717146,"(0.6830125403887917, 0.7072309179511627)",0.708667699560358,"(0.7007058106891255, 0.7203523094443561)",0.6901815509296043,"(0.6805478309232481, 0.6978620424810638)",0.6988088616981832,"(0.685892724720589, 0.7113608374384237)"
