# manual retry
WARNING:
https://github.com/IRkernel/IRkernel needs to be installed

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import pprint
from sklearn.pipeline import Pipeline

import utils
import skutils
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import StratifiedShuffleSplit, cross_val_score
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

gSeed = 47

# Use ggplot style
plt.style.use('ggplot')

%matplotlib inline



In [2]:
big = pd.read_csv('train.csv')
#test = pd.read_csv('test.csv')

big.species = big.species.astype('category')
big.species = big.species.cat.codes

In [3]:
def transform(data):
    ID = data.id
    X = data.drop(['species', 'id'], axis=1)
    y = data['species']
    return ID, X, y

ID, X, y = transform(big)

def addZeroColumn(df, colName):
    df.loc[df[colName] < 0.01, colName + '_is_small'] = 1
    df[colName + '_is_small'].fillna(0, inplace=True)

def addZeroColumns(df, colBaseName):
    for n in range(1,65):
        addZeroColumn(df, colBaseName + str(n))
        
addZeroColumns(X, 'margin')
addZeroColumns(X, 'texture')

In [4]:
%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython

In [5]:
%%R 
# use the R function to create all the same evaluation metrics
library(MLmetrics)

evaluateModel <- function(data,results) {
  # data: real NEVERPAYER column (actual values)
  # results: predicted NEVERPAYER column (predicted values)
  
  confMatrix <- table(data,results)
  print(confMatrix)
  
  err <- (confMatrix["J","N"]+confMatrix["N","J"])/sum(confMatrix)  
  acc <- (confMatrix["J","J"]+confMatrix["N","N"])/sum(confMatrix)  
  
  tpr <- confMatrix["J","J"]/(confMatrix["J","J"]+confMatrix["J","N"]) 
  tnr <- confMatrix["N","N"]/(confMatrix["N","N"]+confMatrix["N","J"]) 
  
  ppv <- confMatrix["J","J"]/(confMatrix["J","J"]+confMatrix["N","J"]) 
  npv <- confMatrix["N","N"]/(confMatrix["N","N"]+confMatrix["J","N"]) 
  
  fpr <- confMatrix["N","J"]/(confMatrix["N","N"]+confMatrix["N","J"]) 
  fnr <- confMatrix["J","N"]/(confMatrix["J","J"]+confMatrix["J","N"]) 
  
  rpp <- (confMatrix["J","J"]+confMatrix["N","J"])/sum(confMatrix) 
  rnp <- (confMatrix["J","J"]+confMatrix["J","N"])/sum(confMatrix) 
    
  kappa <- vcd::Kappa(confMatrix)
  kappa <- kappa$Unweighted[1]
  names(kappa) <- c("kappa") 
  
  lift <- tpr/rpp
  
  names(err) <- c("Error rate")
  names(acc) <- c("Accuracy")
  names(tpr) <- c("Sensitivity (true positives rate)")
  names(tnr) <- c("Specificity (true negatives rate)")
  names(ppv) <- c("Precision J")
  names(npv) <- c("Precision N")
  names(fpr) <- c("False positive rate")
  names(fnr) <- c("False negative rate")
  names(rpp) <- c("Rate of positive predictions")
  names(rnp) <- c("Rate of negative predictions")
  names(lift) <- c("Lift value")

  results <- list(err,acc,tpr,tnr,ppv,npv,fpr,fnr,rpp,rnp,lift, kappa)
  results
}

evaluateAllTheThings <- function(groundTruth, prediction){
    f1 <- MLmetrics::F1_Score(y_pred = prediction, y_true = groundTruth)
    auc <- MLmetrics::AUC(y_pred = prediction, y_true = groundTruth)
    names(f1) <- c("f1_R") 
    names(auc) <- c("AUC_R")

    predictionJN <- ifelse(prediction == 0,"N","J")
    groundTruthJN <- ifelse(groundTruth == 0,"N","J")

    evalA <- evaluateModel(groundTruthJN,predictionJN)
    
    index <- length(evalA)+1

    evalA[[index]] <- f1
    evalA[[index+1]] <- auc
    
    evalA
}

Attache Paket: ‘MLmetrics’



    Recall




In [6]:
def to_str(val):
    return str(val).split('"')[1]


def flatten_dict(d, prefix='__'):
    def items():
        for key, value in d.items():
            if isinstance(value, dict):
                for sub_key, sub_value in flatten_dict(value).items():
                    yield sub_key, sub_value
            else:
                yield key, value

    return dict(items())


class Observation():
    def __init__(self):
        self.statValues = {}
        self.modelName = ""

    def setModelName(self, nameOfModel):
        self.modelName = nameOfModel

    def addStatMetric(self, metricName, metricValue):
        self.statValues[metricName] = metricValue

def evalSingleModel(X, y_test, clf, modelName, variant, _verbose):
    y_predicted = clf.predict(X)

    if(_verbose):
        print(classification_report(y_test, y_predicted))
    # send the data to R
    groundTruth = y_test.values

    %Rpush groundTruth
    %Rpush y_predicted
    %R res <- evaluateAllTheThings(groundTruth, y_predicted)
    %Rpull res
    statsResults = dict([[to_str(j.names),j[0]] for i,j in enumerate(res)])
    obs = Observation()
    obs.setModelName(modelName + '-' + variant)
    
    for _kpi, value in statsResults.items():
        obs.addStatMetric(_kpi, value)
        
    obs.addStatMetric('typeOfRun', variant)
    if(_verbose):
        pp = pprint.PrettyPrinter(indent=4)
        pp.pprint(statsResults)
    return obs

def splitOffValidation(X, y, _seed):
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=_seed)
    for train_index, test_index in split.split(X, y):
        X_work = X.iloc[train_index]
        X_validation = X.iloc[test_index]
        y_work = y.iloc[train_index]
        y_validation = y.iloc[test_index]
    return X_work, X_validation, y_work, y_validation

def evaluateCV(X, y, pipeline, labelData,allResultsOfModels,_seed, _verbose=True):
    X_work, X_validation, y_work, y_validation = splitOffValidation(X, y, _seed)
    ##############################################################
    ### Train /Test
    split = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=_seed)
    foldCounter = 0
    for train_index, test_index in split.split(X_work, y_work):
        foldCounter += 1
        if _verbose:
            print("###################### Training Fold: ", foldCounter, " #################")
        if _verbose:
            print("TRAIN:", train_index)
            print("TEST:", test_index)
        X_train = X_work.iloc[train_index]
        X_test = X_work.iloc[test_index]
        
        y_train = y_work.iloc[train_index]
        y_test = y_work.iloc[test_index]
        X_validationCopy = X_validation.copy()
    
        pipeline.fit(X_train, y_train)

        allResultsOfModels.append(evalSingleModel(X_test, y_test, pipeline, labelData + '_' + str(foldCounter), 'training', _verbose))
    #############################################################
    ### Evaluation on validation set
    
    if _verbose:
        print("###################### Validation #################")
    
    pipeline.fit(X_work, y_work)
    allResultsOfModels.append(evalSingleModel(X_validation, y_validation, pipeline, labelData, 'validation', _verbose))
    
allResultsOfModels = []

In [7]:
from sklearn.svm import SVC

In [8]:
clf = RandomForestClassifier(n_estimators=10, n_jobs=-1)
evaluateCV(X, y, clf, '01_rf10', allResultsOfModels, gSeed)

clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
evaluateCV(X, y, clf, '01_rf100', allResultsOfModels, gSeed)

clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
evaluateCV(X, y, clf, '01_rf1000', allResultsOfModels, gSeed)

clf = SVC()
evaluateCV(X, y, clf, '01_rf', allResultsOfModels, gSeed)

###################### Training Fold:  1  #################
TRAIN: [259  47  82 636  26 214 403  90 233 238 548 545 110 107 602 480 489 577
  76 404 285 437 321  19  49 376  94 179 339 184 518 278 477 302 616 325
 269 505 597 328 303 108 315 239 603 375  23 447 443 247 438 526 287 389
 473 309 654  53 326 401 406 614 360 103 204 691 161 196 413   2 252 296
 139 546 352 164 222 255 588 445 240 387  61 324 400 642 689 384 685 386
 385 142 610 573 323 116 237 688  13 529  12 193  38  21 618 169 576 319
 419 200   1 536 390 498 405 591 253 106 297 595 146 645 230 551 686 510
 417 305 635 147 673 219 207 580 589 647 598 145 575 683 436 607 687 463
 335  98 590 348 371 260 366 102 329 659 538 478  68 135 332 535 649 216
 118 148 249 264 513 220 350 630 322  22 481 410 679 664 369  67 359 173
 676 677 364 560 458 640 428 354 308  25  99 265 425 583 675 578 631  40
 609 572 275  42 327 137 494 474 632 236 104 316 355  74 562 493  96 351
 579 454 221 225 215 550 430 307 243 509 465 623  29 434 

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 137   1
   N   0   1


{   'AUC_R': 2.0,
    'Accuracy': 0.9928057553956835,
    'Error rate': 0.007194244604316547,
    'False negative rate': 0.007246376811594203,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 0.5,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9856115107913669,
    'Sensitivity (true positives rate)': 0.9927536231884058,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 0.6666666666666666,
    'kappa': 0.6634382566585989}
###################### Training Fold:  2  #################
TRAIN: [ 60 545 625 533  59 355 423 159 223 577 576 688  95 510 397 572  31 135
 569 131 132 670 518  24 101 403 421 580 202 118   8 324 493 316  28 386
 331 283 342 557 371 548 608 246  26 477 140 327 547 542 116 393 516 687
 419 384 514 619 685 320 626 263 230 220 537 663   3 359 609  61 178 473
 275 165 413 366 242 334 241 388 245 554 189 385 147 161 254 375 104 219
 322 158 156 229 314 18

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 136   2
   N   0   1


{   'AUC_R': 3.0,
    'Accuracy': 0.9856115107913669,
    'Error rate': 0.014388489208633094,
    'False negative rate': 0.014492753623188406,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 0.3333333333333333,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9784172661870504,
    'Sensitivity (true positives rate)': 0.9855072463768116,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 0.5,
    'kappa': 0.4945454545454513}
###################### Training Fold:  3  #################
TRAIN: [303 265 624 674 245 237 318  95 326 185 684 194 594 430 389 156 466 181
 532 440 545 484 372 396 319 304 288  93 508 554 480  53 541 160 147 201
   7 282 671 112 353 313 685 277 455 118 115 366  14 370 168 576 240 321
 100 457 272 470 563 512 391 469 641 433 505  83 511   5 598 579 578 485
 179 510 187 445 335 311 121 424 681 535 668  85 141 442 346 395 174 154
 622  21 359 356 306 12

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  4  #################
TRAIN: [675 565 152 353 421 197 636 603  87 461  30 618   8 648 574 610 480 456
 336  69 174 397 474 593 154  65 366  75 364 370 427 566 130 285 165 157
 128 307 546 575 327 586 595  22   0 263 293 365 626 633 689 249  91 252
 584 513 451 512 499   4 429 594  18 189 117 377 188 649 475  20 406 283
 166  80 303 289 612 494 578 290 426 308  83 349 522 271  31 401 106 616
 342 236 304 537 344 331 350 501 180  11 473 118 445 235 452 641 297  29
 149 228 600 301  25 464 465 382 394 169 121

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 137   0
   N   0   2


{   'AUC_R': 1.5,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0145985401459854,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9856115107913669,
    'Rate of positive predictions': 0.9856115107913669,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  5  #################
TRAIN: [556 165  54 370 309 193 269 229 491  59 492 565 362 418 612 128 606 149
 422  98 241 381 304 635 604 336 654 261 355 231 187 200 475 199 463 522
  49 639 472 649 120 508 480 420 670 296  73  94 108 212 435 392 317  63
 308 663  83 145 218 223 593 653  62 268 678 243 146 470   2 102 173 235
 679 281 582   7   5 575 270 148 385 485 659  65  95 421 150 303 484 330
  96 374 397  22 517 578 618 251 534 464 162  80  21   6 380 400 561  52
 544 538  61 401 278 550 154 334   0 673 191

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 25.5,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Validation #################
             precision    recall  f1-score   support

          0       0.50      0.67      0.57         3
          1       0.60      1.00      0.75         3
          2       0.60      1.00      0.75         3
          3       0.75      1.00      0.86         3
          4       0.60      1.00      0.75         3
          5       1.00      1.00      1.00         3
          6       0.75      1.00      0.86         3
          7       0.60      1.00      0.75         3
          8       

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 292   2
   N   1   2


{   'AUC_R': 1.6666666666666667,
    'Accuracy': 0.98989898989899,
    'Error rate': 0.010101010101010102,
    'False negative rate': 0.006802721088435374,
    'False positive rate': 0.3333333333333333,
    'Lift value': 1.0067562861322004,
    'Precision J': 0.9965870307167235,
    'Precision N': 0.5,
    'Rate of negative predictions': 0.98989898989899,
    'Rate of positive predictions': 0.9865319865319865,
    'Sensitivity (true positives rate)': 0.9931972789115646,
    'Specificity (true negatives rate)': 0.6666666666666666,
    'f1_R': 0.5714285714285715,
    'kappa': 0.5664233576642341}
###################### Training Fold:  1  #################
TRAIN: [259  47  82 636  26 214 403  90 233 238 548 545 110 107 602 480 489 577
  76 404 285 437 321  19  49 376  94 179 339 184 518 278 477 302 616 325
 269 505 597 328 303 108 315 239 603 375  23 447 443 247 438 526 287 389
 473 309 654  53 326 401 406 614 360 103 204 691 161 196 413   2 252 296
 139 546 352 164 222 255 588 445 240 387

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  2  #################
TRAIN: [ 60 545 625 533  59 355 423 159 223 577 576 688  95 510 397 572  31 135
 569 131 132 670 518  24 101 403 421 580 202 118   8 324 493 316  28 386
 331 283 342 557 371 548 608 246  26 477 140 327 547 542 116 393 516 687
 419 384 514 619 685 320 626 263 230 220 537 663   3 359 609  61 178 473
 275 165 413 366 242 334 241 388 245 554 189 385 147 161 254 375 104 219
 322 158 156 229 314 185 534 471 170 374 236 679 148 425 635 402 370 566
 134 206 458 109 672 201  14 112   6 631 356

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  3  #################
TRAIN: [303 265 624 674 245 237 318  95 326 185 684 194 594 430 389 156 466 181
 532 440 545 484 372 396 319 304 288  93 508 554 480  53 541 160 147 201
   7 282 671 112 353 313 685 277 455 118 115 366  14 370 168 576 240 321
 100 457 272 470 563 512 391 469 641 433 505  83 511   5 598 579 578 485
 179 510 187 445 335 311 121 424 681 535 668  85 141 442 346 395 174 154
 622  21 359 356 306 124 271 300 384  28 127 233  27  56 244 342 294 247
 489 191 157 661 580 189 268 279 357 524 553

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  4  #################
TRAIN: [675 565 152 353 421 197 636 603  87 461  30 618   8 648 574 610 480 456
 336  69 174 397 474 593 154  65 366  75 364 370 427 566 130 285 165 157
 128 307 546 575 327 586 595  22   0 263 293 365 626 633 689 249  91 252
 584 513 451 512 499   4 429 594  18 189 117 377 188 649 475  20 406 283
 166  80 303 289 612 494 578 290 426 308  83 349 522 271  31 401 106 616
 342 236 304 537 344 331 350 501 180  11 473 118 445 235 452 641 297  29
 149 228 600 301  25 464 465 382 394 169 121

    results
data   J   N
   J 137   0
   N   0   2


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0145985401459854,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9856115107913669,
    'Rate of positive predictions': 0.9856115107913669,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  5  #################
TRAIN: [556 165  54 370 309 193 269 229 491  59 492 565 362 418 612 128 606 149
 422  98 241 381 304 635 604 336 654 261 355 231 187 200 475 199 463 522
  49 639 472 649 120 508 480 420 670 296  73  94 108 212 435 392 317  63
 308 663  83 145 218 223 593 653  62 268 678 243 146 470   2 102 173 235
 679 281 582   7   5 575 270 148 385 485 659  65  95 421 150 303 484 330
  96 374 397  22 517 578 618 251 534 464 162  80  21   6 380 400 561  52
 544 538  61 401 278 550 154 334   0 673 191

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Validation #################
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         3
          1       1.00      1.00      1.00         3
          2       1.00      1.00      1.00         3
          3       1.00      1.00      1.00         3
          4       1.00      1.00      1.00         3
          5       1.00      1.00      1.00         3
          6       1.00      1.00      1.00         3
          7       1.00      1.00      1.00         3
          8       1

    results
data   J   N
   J 294   0
   N   0   3


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.010204081632653,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.98989898989899,
    'Rate of positive predictions': 0.98989898989899,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  1  #################
TRAIN: [259  47  82 636  26 214 403  90 233 238 548 545 110 107 602 480 489 577
  76 404 285 437 321  19  49 376  94 179 339 184 518 278 477 302 616 325
 269 505 597 328 303 108 315 239 603 375  23 447 443 247 438 526 287 389
 473 309 654  53 326 401 406 614 360 103 204 691 161 196 413   2 252 296
 139 546 352 164 222 255 588 445 240 387  61 324 400 642 689 384 685 386
 385 142 610 573 323 116 237 688  13 529  12 193  38  21 618 169 576 319
 419 200   1 536 390 498 405 591 253 106 297 595 

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  2  #################
TRAIN: [ 60 545 625 533  59 355 423 159 223 577 576 688  95 510 397 572  31 135
 569 131 132 670 518  24 101 403 421 580 202 118   8 324 493 316  28 386
 331 283 342 557 371 548 608 246  26 477 140 327 547 542 116 393 516 687
 419 384 514 619 685 320 626 263 230 220 537 663   3 359 609  61 178 473
 275 165 413 366 242 334 241 388 245 554 189 385 147 161 254 375 104 219
 322 158 156 229 314 185 534 471 170 374 236 679 148 425 635 402 370 566
 134 206 458 109 672 201  14 112   6 631 356

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  3  #################
TRAIN: [303 265 624 674 245 237 318  95 326 185 684 194 594 430 389 156 466 181
 532 440 545 484 372 396 319 304 288  93 508 554 480  53 541 160 147 201
   7 282 671 112 353 313 685 277 455 118 115 366  14 370 168 576 240 321
 100 457 272 470 563 512 391 469 641 433 505  83 511   5 598 579 578 485
 179 510 187 445 335 311 121 424 681 535 668  85 141 442 346 395 174 154
 622  21 359 356 306 124 271 300 384  28 127 233  27  56 244 342 294 247
 489 191 157 661 580 189 268 279 357 524 553

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  4  #################
TRAIN: [675 565 152 353 421 197 636 603  87 461  30 618   8 648 574 610 480 456
 336  69 174 397 474 593 154  65 366  75 364 370 427 566 130 285 165 157
 128 307 546 575 327 586 595  22   0 263 293 365 626 633 689 249  91 252
 584 513 451 512 499   4 429 594  18 189 117 377 188 649 475  20 406 283
 166  80 303 289 612 494 578 290 426 308  83 349 522 271  31 401 106 616
 342 236 304 537 344 331 350 501 180  11 473 118 445 235 452 641 297  29
 149 228 600 301  25 464 465 382 394 169 121

    results
data   J   N
   J 137   0
   N   0   2


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0145985401459854,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9856115107913669,
    'Rate of positive predictions': 0.9856115107913669,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  5  #################
TRAIN: [556 165  54 370 309 193 269 229 491  59 492 565 362 418 612 128 606 149
 422  98 241 381 304 635 604 336 654 261 355 231 187 200 475 199 463 522
  49 639 472 649 120 508 480 420 670 296  73  94 108 212 435 392 317  63
 308 663  83 145 218 223 593 653  62 268 678 243 146 470   2 102 173 235
 679 281 582   7   5 575 270 148 385 485 659  65  95 421 150 303 484 330
  96 374 397  22 517 578 618 251 534 464 162  80  21   6 380 400 561  52
 544 538  61 401 278 550 154 334   0 673 191

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Validation #################
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         3
          1       1.00      1.00      1.00         3
          2       1.00      1.00      1.00         3
          3       1.00      1.00      1.00         3
          4       1.00      1.00      1.00         3
          5       1.00      1.00      1.00         3
          6       1.00      1.00      1.00         3
          7       1.00      1.00      1.00         3
          8       1

    results
data   J   N
   J 294   0
   N   0   3


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.010204081632653,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.98989898989899,
    'Rate of positive predictions': 0.98989898989899,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Training Fold:  1  #################
TRAIN: [259  47  82 636  26 214 403  90 233 238 548 545 110 107 602 480 489 577
  76 404 285 437 321  19  49 376  94 179 339 184 518 278 477 302 616 325
 269 505 597 328 303 108 315 239 603 375  23 447 443 247 438 526 287 389
 473 309 654  53 326 401 406 614 360 103 204 691 161 196 413   2 252 296
 139 546 352 164 222 255 588 445 240 387  61 324 400 642 689 384 685 386
 385 142 610 573 323 116 237 688  13 529  12 193  38  21 618 169 576 319
 419 200   1 536 390 498 405 591 253 106 297 595 

  'precision', 'predicted', average, warn_for)


    results
data   J   N
   J 136   2
   N   0   1


{   'AUC_R': 4.0,
    'Accuracy': 0.9856115107913669,
    'Error rate': 0.014388489208633094,
    'False negative rate': 0.014492753623188406,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 0.3333333333333333,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9784172661870504,
    'Sensitivity (true positives rate)': 0.9855072463768116,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 0.5,
    'kappa': 0.4945454545454513}
###################### Training Fold:  2  #################
TRAIN: [ 60 545 625 533  59 355 423 159 223 577 576 688  95 510 397 572  31 135
 569 131 132 670 518  24 101 403 421 580 202 118   8 324 493 316  28 386
 331 283 342 557 371 548 608 246  26 477 140 327 547 542 116 393 516 687
 419 384 514 619 685 320 626 263 230 220 537 663   3 359 609  61 178 473
 275 165 413 366 242 334 241 388 245 554 189 385 147 161 254 375 104 219
 322 158 156 229 314 18

    results
data   J   N
   J 136   2
   N   0   1


{   'AUC_R': 6.0,
    'Accuracy': 0.9856115107913669,
    'Error rate': 0.014388489208633094,
    'False negative rate': 0.014492753623188406,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 0.3333333333333333,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9784172661870504,
    'Sensitivity (true positives rate)': 0.9855072463768116,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 0.5,
    'kappa': 0.4945454545454513}
###################### Training Fold:  3  #################
TRAIN: [303 265 624 674 245 237 318  95 326 185 684 194 594 430 389 156 466 181
 532 440 545 484 372 396 319 304 288  93 508 554 480  53 541 160 147 201
   7 282 671 112 353 313 685 277 455 118 115 366  14 370 168 576 240 321
 100 457 272 470 563 512 391 469 641 433 505  83 511   5 598 579 578 485
 179 510 187 445 335 311 121 424 681 535 668  85 141 442 346 395 174 154
 622  21 359 356 306 12

    results
data   J   N
   J 135   3
   N   0   1


{   'AUC_R': 16.0,
    'Accuracy': 0.9784172661870504,
    'Error rate': 0.02158273381294964,
    'False negative rate': 0.021739130434782608,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 0.25,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9712230215827338,
    'Sensitivity (true positives rate)': 0.9782608695652174,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 0.4,
    'kappa': 0.3930131004366815}
###################### Training Fold:  4  #################
TRAIN: [675 565 152 353 421 197 636 603  87 461  30 618   8 648 574 610 480 456
 336  69 174 397 474 593 154  65 366  75 364 370 427 566 130 285 165 157
 128 307 546 575 327 586 595  22   0 263 293 365 626 633 689 249  91 252
 584 513 451 512 499   4 429 594  18 189 117 377 188 649 475  20 406 283
 166  80 303 289 612 494 578 290 426 308  83 349 522 271  31 401 106 616
 342 236 304 537 344 331 350 501 180 

  nur für einen data frame definiert, mit nur nummerischen Variablen



             precision    recall  f1-score   support

          0       1.00      1.00      1.00         1
          1       0.33      1.00      0.50         1
          2       0.50      1.00      0.67         1
          3       0.50      1.00      0.67         1
          4       0.00      0.00      0.00         2
          5       0.00      0.00      0.00         2
          6       0.00      0.00      0.00         2
          7       0.00      0.00      0.00         2
          8       1.00      1.00      1.00         1
          9       1.00      1.00      1.00         1
         10       0.00      0.00      0.00         2
         11       0.00      0.00      0.00         2
         12       0.25      1.00      0.40         1
         13       0.00      0.00      0.00         1
         14       0.00      0.00      0.00         2
         15       0.33      1.00      0.50         1
         16       0.50      1.00      0.67         1
         17       0.14      1.00      0.25   

    results
data   J   N
   J 138   0
   N   0   1


{   'AUC_R': 2.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.0072463768115942,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.9928057553956835,
    'Rate of positive predictions': 0.9928057553956835,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}
###################### Validation #################
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         3
          1       1.00      1.00      1.00         3
          2       1.00      1.00      1.00         3
          3       1.00      1.00      1.00         3
          4       1.00      1.00      1.00         3
          5       1.00      1.00      1.00         3
          6       1.00      1.00      1.00         3
          7       1.00      1.00      1.00         3
          8       0

    results
data   J   N
   J 294   0
   N   0   3


{   'AUC_R': 1.0,
    'Accuracy': 1.0,
    'Error rate': 0.0,
    'False negative rate': 0.0,
    'False positive rate': 0.0,
    'Lift value': 1.010204081632653,
    'Precision J': 1.0,
    'Precision N': 1.0,
    'Rate of negative predictions': 0.98989898989899,
    'Rate of positive predictions': 0.98989898989899,
    'Sensitivity (true positives rate)': 1.0,
    'Specificity (true negatives rate)': 1.0,
    'f1_R': 1.0,
    'kappa': 1.0}


In [9]:
# komisch - das crasht bei mir
#clf = LogisticRegression(solver='lbfgs', multi_class='multinomial', n_jobs=-1)
#evaluateCV(X, y, clf, '02_lr', allResultsOfModels, gSeed)

In [10]:
results = []
for res in allResultsOfModels:
    results.append(res.__dict__)

l = list(map(flatten_dict, results))
results = pd.DataFrame.from_dict(l)

In [12]:
train_res = results[results.typeOfRun != 'validation']
overview = train_res.groupby([train_res.modelName.str.split('_').str[1]]).describe().unstack(
    fill_value=0).loc[:,
           pd.IndexSlice[:, ['mean', 'std']]]#[['kappa', 'Lift value', 'False positive rate', 'False negative rate']]
overview.columns = ['{0[0]}_{0[1]}'.format(tup) for tup in overview.columns]
overview.sort_values('kappa_mean', ascending=False)

Unnamed: 0_level_0,AUC_R_mean,AUC_R_std,Accuracy_mean,Accuracy_std,Error rate_mean,Error rate_std,False negative rate_mean,False negative rate_std,False positive rate_mean,False positive rate_std,...,Rate of positive predictions_mean,Rate of positive predictions_std,Sensitivity (true positives rate)_mean,Sensitivity (true positives rate)_std,Specificity (true negatives rate)_mean,Specificity (true negatives rate)_std,f1_R_mean,f1_R_std,kappa_mean,kappa_std
modelName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
rf100,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.991367,0.003217,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
rf1000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.991367,0.003217,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
rf10,6.6,10.59127,0.995683,0.006435,0.004317,0.006435,0.004348,0.006481,0.0,0.0,...,0.98705,0.006019,0.995652,0.006481,1.0,0.0,0.833333,0.235702,0.831597,0.238202
rf,8.8,6.723095,0.985612,0.008811,0.014388,0.008811,0.014493,0.008875,0.0,0.0,...,0.978417,0.008811,0.985507,0.008875,1.0,0.0,0.56,0.250998,0.555023,0.253877
