In [1]:
# Model Selection
# ===============
# This notebook compares the scores of different model algorithms, 
# preprocessing methods, and classification methods.
#
# Copyright 2020, 2021 Jerrad M. Genson
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

library(tidyverse)

GIT_ROOT <- system2('git', args=c('rev-parse', '--show-toplevel'), stdout=TRUE)
DATA <- file.path(GIT_ROOT, 'data')
MODEL_DATA <- file.path(DATA, 'model_selection.csv')

scores <- read_csv(MODEL_DATA)
scores

── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


[36m──[39m [1m[1mColumn specificati

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
qda,robust scaling,0.814,0.8095,0.8763,0.7333,0.6096,19.48,0.2993,0.4833,0.6036,0.08135,5236d5b
lda,robust scaling,0.814,0.8155,0.866,0.7467,0.6126,19.04,0.2981,0.55,0.5782,0.07821,9b44485
dtc,robust scaling,0.7791,0.7565,0.8969,0.6267,0.5236,14.6,0.2389,0.65,0.5143,0.07692,ad170e6
sgb,robust scaling,0.8081,0.7963,0.8866,0.7067,0.5933,18.83,0.2895,0.5667,0.5879,0.07756,87c9f52
rfc,robust scaling,0.8314,0.8269,0.8866,0.76,0.6466,24.76,0.3384,0.55,0.5627,0.0864,604f618
rfc,isomap,0.8081,0.82,0.8454,0.76,0.6054,17.31,0.2851,0.4667,0.6357,0.1183,614ff30
rfc,pca,0.843,0.8365,0.8969,0.7733,0.6702,29.68,0.3668,0.65,0.5782,0.09998,e16ba46
rfc,lle,0.7965,0.8444,0.7835,0.8133,0.5968,15.77,0.2693,0.4667,0.5334,0.1464,4bdf43f
rfc,lle hessian,0.8198,0.8235,0.866,0.76,0.626,20.46,0.3105,0.4833,0.5205,0.0957,3be6109
rfc,lle modified,0.7849,0.8061,0.8144,0.7467,0.5611,12.94,0.2394,0.4667,0.5314,0.1056,72f0897


In [2]:
best_informedness <- scores[scores$informedness == max(scores$informedness), ]
best_informedness

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
knn,factor analysis,0.8721,0.8788,0.8969,0.84,0.7369,45.67,0.4408,0.6333,0.6164,0.1071,05e50d5


In [3]:
best_dor <- scores[scores$dor == max(scores$dor), ]
best_dor

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
knn,factor analysis,0.8721,0.8788,0.8969,0.84,0.7369,45.67,0.4408,0.6333,0.6164,0.1071,05e50d5


In [4]:
best_sensitivity <- scores[scores$sensitivity == max(scores$sensitivity), ]
best_sensitivity

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
dtc,robust scaling,0.7791,0.7565,0.8969,0.6267,0.5236,14.6,0.2389,0.65,0.5143,0.07692,ad170e6
rfc,pca,0.843,0.8365,0.8969,0.7733,0.6702,29.68,0.3668,0.65,0.5782,0.09998,e16ba46
knn,pca,0.8372,0.8286,0.8969,0.76,0.6569,27.55,0.3535,0.75,0.5506,0.1209,7ace118
knn,factor analysis,0.8721,0.8788,0.8969,0.84,0.7369,45.67,0.4408,0.6333,0.6164,0.1071,05e50d5


In [5]:
best_cv_informedness <- scores[scores$`cv informedness` == max(scores$`cv informedness`), ]
best_cv_informedness

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
rfc,isomap,0.8081,0.82,0.8454,0.76,0.6054,17.31,0.2851,0.4667,0.6357,0.1183,614ff30


In [6]:
second_best_informedness <- scores[scores$informedness == unique(sort(scores$informedness, TRUE))[2], ]
second_best_informedness

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
lrc,lle,0.8547,0.875,0.866,0.84,0.706,33.92,0.3948,0.7333,0.541,0.1554,0330f84


In [7]:
third_best_informedness <- scores[scores$informedness == unique(sort(scores$informedness, TRUE))[3], ]
third_best_informedness

model,preprocessing,accuracy,precision,sensitivity,specificity,informedness,dor,ami,outlier informedness,cv informedness,mad informedness,commit hash
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
knn,robust scaling,0.8372,0.8791,0.8247,0.8533,0.6781,27.38,0.3575,0.55,0.6219,0.1052,3ee0a51
knn,feature agglomeration,0.8372,0.8791,0.8247,0.8533,0.6781,27.38,0.3575,0.55,0.6219,0.1052,10fa6ee
