In [2]:
# Model Selection
# ===============
# This notebook compares the scores of different model algorithms and 
# preprocessing methods. 
#
# As can be seen from the data below, support  vector machine with standard 
# scaling scored highest in accuracy, sensitivity, and informedness, while 
# quadratic discriminant analysis with robust scaling scored highest in 
# precision and specificity. All models performed better with the fasting 
# blood sugar feature than without it, even though there were somewhat
# fewer samples to train the model when including this feature.
# 
# There are a number of considerations to take into account when deciding
# between support vector machine and quadratic discriminant analysis, as
# they performed well in different areas and have different strengths and
# weaknesses as models in general. One significant advantage of quadratic
# discriminant analysis is that it can be easier to analyze the choices made
# by this algorithm, which both increases our confidence in the model and
# helps us understand why a subject was classified one way or the other.
# Advantages of support vector machine include the robustness of this model
# with respect to outliers and normality, both of which are assumptions made
# by quadratic discriminant analysis and violated repeatedly by this dataset.
# Additionally, support vector machine can be used to predict the probability
# that a subject falls into one class or another, instead of merely making a
# classification, which helps increase our confidence in the model's decisions.
# 
# Although quadratic discriminant analysis is easier to analyze in a conventional 
# way, the data violate the assumptions made by this algorithm in several ways, 
# which negates the usual increase in confidence that this would otherwise inspire 
# (see Feature Selection for more details on this). Access to classification 
# probabilities would also be extremely useful, which is an advantage afforded only
# by support vector machine. Finally, a higher sensitivity is likely preferable to 
# a higher specificity, as we would rather falsely classify a subject as positive 
# for heart disease and perform additional unnecessary medical testing than 
# falsely classify a subject as negative and miss an impending myocardial infarction.
#
# Thus, we choose support vector machine as the basis for our model.
scores_list <- list(
    list('ridge regression', 'standard scaling', TRUE, 0.8387, 0.8913, 0.8039, 0.881, 0.6849),
    list('ridge regression', 'robust scaling', TRUE, 0.8602, 0.8958, 0.8431, 0.881, 0.7241),
    list('ridge regression', 'quantile transformer', TRUE, 0.8602, 0.8958, 0.8431, 0.881, 0.7241),
    list('ridge regression', 'power transformer', TRUE, 0.8495, 0.8936, 0.8235, 0.881, 0.7045),
    list('ridge regression', 'normalization', TRUE, 0.8495, 0.8936, 0.8235, 0.881, 0.7045),
    list('ridge regression', 'pca', TRUE, 0.7419, 0.7547, 0.7843, 0.6905, 0.4748),
    list('ridge regression', 'robust scaling', FALSE, 0.7545, 0.7571, 0.8413, 0.6383, 0.4796),
    list('logistic regression', 'robust scaling', TRUE, 0.8602, 0.8958, 0.8431, 0.881, 0.7241),
    list('linear discriminant analysis', 'robust scaling', TRUE, 0.8602, 0.8958, 0.8431, 0.881, 0.7241),
    list('quadratic discriminant analysis', 'robust scaling', TRUE, 0.8817, 0.9167, 0.8627, 0.9048, 0.7675),
    list('multilayer perceptron', 'robust scaling', TRUE, 0.8925, 0.8868, 0.9216, 0.8571, 0.7787),
    list('random forest', 'robust scaling', TRUE, 0.8602, 0.8654, 0.8824, 0.8333, 0.7157),
    list('stochastic gradient descent', 'robust scaling', TRUE, 0.8602, 0.8654, 0.8824, 0.8333, 0.7157),
    list('k-nearest neighbors', 'robust scaling', TRUE, 0.8817, 0.9, 0.8824, 0.881, 0.7633),
    list('support vector machine', 'robust scaling', TRUE, 0.8925, 0.8868, 0.9216, 0.8571, 0.7787),
    list('support vector machine', 'standard scaling', TRUE, 0.9032, 0.8889, 0.9412, 0.8571, 0.7983),
    list('support vector machine', 'standard scaling', FALSE, 0.7727, 0.7436, 0.9206, 0.5745, 0.4951),
    list('support vector machine', 'none', TRUE, 0.7312, 0.7321, 0.8039, 0.6429, 0.4468),
    list('support vector machine', 'quantile transformer', TRUE, 0.9032, 0.8889, 0.9412, 0.8571, 0.7983),
    list('support vector machine', 'power transformer', TRUE, 0.8925, 0.8727, 0.9412, 0.8333, 0.7745),
    list('support vector machine', 'normalize', TRUE, 0.6989, 0.7347, 0.7059, 0.6905, 0.3964),
    list('support vector machine', 'pca', TRUE, 0.6989, 0.717, 0.7451, 0.6429, 0.388))

scores <- data.frame(do.call(rbind.data.frame, scores_list))
colnames(scores) <- c('model', 'preprocessing', 'includes fbs', 'accuracy', 'precision', 'sensitivity', 'specificity', 'informedness')
scores

Unnamed: 0_level_0,model,preprocessing,includes fbs,accuracy,precision,sensitivity,specificity,informedness
Unnamed: 0_level_1,<chr>,<chr>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2,ridge regression,standard scaling,True,0.8387,0.8913,0.8039,0.881,0.6849
23,ridge regression,robust scaling,True,0.8602,0.8958,0.8431,0.881,0.7241
3,ridge regression,quantile transformer,True,0.8602,0.8958,0.8431,0.881,0.7241
4,ridge regression,power transformer,True,0.8495,0.8936,0.8235,0.881,0.7045
5,ridge regression,normalization,True,0.8495,0.8936,0.8235,0.881,0.7045
6,ridge regression,pca,True,0.7419,0.7547,0.7843,0.6905,0.4748
7,ridge regression,robust scaling,False,0.7545,0.7571,0.8413,0.6383,0.4796
8,logistic regression,robust scaling,True,0.8602,0.8958,0.8431,0.881,0.7241
9,linear discriminant analysis,robust scaling,True,0.8602,0.8958,0.8431,0.881,0.7241
10,quadratic discriminant analysis,robust scaling,True,0.8817,0.9167,0.8627,0.9048,0.7675
