In [304]:
import joblib
import matplotlib.pyplot as plt
import pandas
from functools import reduce
from sklearn.metrics import (
    plot_confusion_matrix,
    precision_recall_fscore_support,
    plot_roc_curve,
    accuracy_score,
    plot_precision_recall_curve,
    confusion_matrix,
)
from IPython.display import display, Markdown, Latex

from constants import *
from train import get_model_file_name, get_test_file_name, get_vectorizer_file_name

report_calculation_file = open("./report-extras/report-calculation.txt", "w")


def get_test_result_file_name(fold):
    return "./test_result_data/hasil_uji_{}.csv".format(fold)


ALGORITHMS = [
    NAIVE_BAYES_ID,
    SVM_ID,
    MULTILAYER_PERCEPTRON_ID,
    DECISION_TREE_ID,
    RANDOM_FOREST_ID,
]

test_results_per_algorithm = {
    NAIVE_BAYES_ID: [],
    SVM_ID: [],
    MULTILAYER_PERCEPTRON_ID: [],
    DECISION_TREE_ID: [],
    RANDOM_FOREST_ID: [],
}

plot_list = {
    NAIVE_BAYES_ID: {},
    SVM_ID: {},
    MULTILAYER_PERCEPTRON_ID: {},
    DECISION_TREE_ID: {},
    RANDOM_FOREST_ID: {},
}

for key in plot_list:
    roc_fig, roc_ax = plt.subplots()
    plot_list[key]["roc_ax"] = roc_ax
    plot_list[key]["roc_fig"] = roc_fig

    prc_fig, prc_ax = plt.subplots()
    plot_list[key]["prc_ax"] = prc_ax
    plot_list[key]["prc_fig"] = prc_fig

for fold in range(0, N_FOLDS):
    test_results_per_fold = []

    for algorithm_id in ALGORITHMS:
        model = joblib.load(get_model_file_name(algorithm_id, fold))

        test_file = pandas.read_csv(get_test_file_name(fold))
        data_test = test_file[DATA_KEY]
        target_test = test_file[TARGET_KEY]

        tfidf_vectorizer = joblib.load(get_vectorizer_file_name(fold))

        processed_data_test = tfidf_vectorizer.transform(data_test).toarray()

        predicted_data_test = model.predict(processed_data_test)

        # Plot and save confusion matrix
        plot_confusion_matrix(
            model,
            processed_data_test,
            target_test,
            labels=["f", "h"],
            display_labels=["Fakta", "Hoax"],
            cmap="Greys",
        )

        plt.ylabel("Kelas Prediksi")
        plt.xlabel("Hasil Prediksi")

        plt.savefig(
            "./images/{}_{}_CONFUSION_MATRIX.png".format(
                fold,
                algorithm_id,
            )
        )
        plt.clf()
        plt.close()

        # Plot and save ROC Curve
        name = "{}{}".format(ALGORITHM_SHORT_LABELS[algorithm_id], fold + 1)

        plot_roc_curve(
            model,
            processed_data_test,
            target_test,
            name=name,
            ax=plot_list[algorithm_id]["roc_ax"],
        )

        plot_precision_recall_curve(
            model,
            processed_data_test,
            target_test,
            name=name,
            ax=plot_list[algorithm_id]["prc_ax"],
        )

        tn, fp, fn, tp = confusion_matrix(
            target_test,
            predicted_data_test,
        ).ravel()


        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1_score = 2 * precision * recall / (precision + recall)
        accuracy = (tp + tn) / (tp + fp + tn + fn)

        display(
            Markdown(
                f"""
Pada diagram di bawah, dapat dilihat bahwa untuk fold ke-{fold + 1} pada algoritma *{ALGORITHM_LABELS[algorithm_id]}*, jumlah *true negative* (tn) = {tn}, *false positive* (fp) = {fp}, *false negative* (fn) = {fn}; Dan *true positive* (tp) = {tp}. Maka nilai *precision* = tp / (tp + fp) =  {tp} / ({tp} + {tp}) = {precision:0.4f}; Nilai *recall* = tp / (tp + fn) = {tp} / ({tp} + {fn}) = {recall:0.4f}; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ {precision:0.4f} ✕ {recall:0.4f} / ({precision:0.4f} + {recall:0.4f}) = {f1_score:0.4f}; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = {tp} + {tn} / ({tp} + {fp} + {tn} + {fn}) = {accuracy:0.4f}.
"""         )
        )


        print(
            """Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-{} pada algoritma {}, jumlah true negative (tn) = {:.2f}, false positive (fp) = {:.2f}, false negative (fn) = {:.2f}; Dan true positive (tp) = {:.2f}. Maka nilai precision = tp / (tp + fp) =  {:.2f} / ({:.2f} + {:.2f})= {:.2f}; Nilai recall = tp / (tp + fn) = {:.2f} / ({:.2f} + {:.2f}) = {:.2f}; F1-score = 2 x precision x recall / (precision + recall) = 2 x {:.2f} x {:.2f} / ({:.2f} + {:.2f}) = {:.2f}; Nilai accuracy = tp + tn / (tp + fp + tn + fn) = {:.2f} + {:.2f} / ({:.2f} + {:.2f} + {:.2f} + {:.2f}) = {:.2f}.\n
            """.format(
                fold + 1, 
                ALGORITHM_LABELS[algorithm_id],
                tn, fp, fn, tp,
                tp, tp, fp, precision,
                tp, tp, fn, recall,
                precision, recall, precision, recall, f1_score,
                tp, tn, tp, fp, tn, fn, accuracy,
            ).strip(
            ),
            file=report_calculation_file
        )
        
        precision, recall, f_score, support = precision_recall_fscore_support(
            target_test,
            predicted_data_test,
            pos_label="h",
            zero_division=0,
            average="binary",
        )

        accuracy = accuracy_score(target_test, predicted_data_test)

        test_results_per_algorithm[algorithm_id].append(
            {
                "Precision": precision,
                "Recall": recall,
                "F1-Score": f_score,
                "Accuracy": accuracy,
            }
        )

        test_results_per_fold.append(
            {
                "Algoritma": ALGORITHM_LABELS[algorithm_id],
                "Precision": precision,
                "Recall": recall,
                "F1-Score": f_score,
                "Accuracy": accuracy,
            }
        )

    print(
        "Berikut merupakan tabel hasil pengujian untuk fold {}.\n".format(
            fold + 1
        ),
        file=report_calculation_file
    )

    pandas.DataFrame(
        test_results_per_fold,
    ).to_csv(
        get_test_result_file_name(fold + 1),
        float_format="%0.4f",
    )

for algorithm_id in plot_list:
    roc_fig = plot_list[algorithm_id]["roc_fig"]
    roc_fig.savefig("./images/ROC_CURVE_{}.png".format(algorithm_id))
    roc_fig.clf()

    prc_fig = plot_list[algorithm_id]["prc_fig"]
    prc_fig.savefig("./images/PPC_CURVE_{}.png".format(algorithm_id))
    prc_fig.clf()

averages_list = []

for algorithm_id, test_result in test_results_per_algorithm.items():
    averages_list.append({
        "Algoritma": ALGORITHM_LABELS[algorithm_id],
        **pandas.DataFrame(test_result).mean().to_dict()
    })

report_average_df = pandas.DataFrame(
    averages_list
)
report_average_df.set_index(
    "Algoritma",
    inplace=True
)

report_average_df.agg({
    'Precision': ['min', 'max'],
    'Recall': ['min', 'max'],
    'F1-Score': ['min', 'max'],
    'Accuracy': ['min', 'max'],
})

report_average_df.to_csv(
    "./test_result_data/Rata-Rata Hasil Penelitian.csv",
    float_format="%0.4f"
)


Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-1 pada algoritma *Naive Bayes*, jumlah *true negative* (tn) = 3, *false positive* (fp) = 9, *false negative* (fn) = 0; Dan *true positive* (tp) = 48. Maka nilai *precision* = tp / (tp + fp) =  48 / (48 + 48) = 0.8421; Nilai *recall* = tp / (tp + fn) = 48 / (48 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8421 ✕ 1.0000 / (0.8421 + 1.0000) = 0.9143$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 48 + 3 / (48 + 9 + 3 + 0) = 0.8500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-1 pada algoritma *Support Vector Machine*, jumlah *true negative* (tn) = 2, *false positive* (fp) = 10, *false negative* (fn) = 0; Dan *true positive* (tp) = 48. Maka nilai *precision* = tp / (tp + fp) =  48 / (48 + 48) = 0.8276; Nilai *recall* = tp / (tp + fn) = 48 / (48 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8276 ✕ 1.0000 / (0.8276 + 1.0000) = 0.9057$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 48 + 2 / (48 + 10 + 2 + 0) = 0.8333.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-1 pada algoritma *Multilayer Perceptron*, jumlah *true negative* (tn) = 9, *false positive* (fp) = 3, *false negative* (fn) = 8; Dan *true positive* (tp) = 40. Maka nilai *precision* = tp / (tp + fp) =  40 / (40 + 40) = 0.9302; Nilai *recall* = tp / (tp + fn) = 40 / (40 + 8) = 0.8333; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.9302 ✕ 0.8333 / (0.9302 + 0.8333) = 0.8791$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 40 + 9 / (40 + 3 + 9 + 8) = 0.8167.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-1 pada algoritma *Decision Tree*, jumlah *true negative* (tn) = 8, *false positive* (fp) = 4, *false negative* (fn) = 14; Dan *true positive* (tp) = 34. Maka nilai *precision* = tp / (tp + fp) =  34 / (34 + 34) = 0.8947; Nilai *recall* = tp / (tp + fn) = 34 / (34 + 14) = 0.7083; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8947 ✕ 0.7083 / (0.8947 + 0.7083) = 0.7907$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 34 + 8 / (34 + 4 + 8 + 14) = 0.7000.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-1 pada algoritma *Random Forest*, jumlah *true negative* (tn) = 5, *false positive* (fp) = 7, *false negative* (fn) = 4; Dan *true positive* (tp) = 44. Maka nilai *precision* = tp / (tp + fp) =  44 / (44 + 44) = 0.8627; Nilai *recall* = tp / (tp + fn) = 44 / (44 + 4) = 0.9167; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8627 ✕ 0.9167 / (0.8627 + 0.9167) = 0.8889$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 44 + 5 / (44 + 7 + 5 + 4) = 0.8167.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-2 pada algoritma *Naive Bayes*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 1, *false negative* (fn) = 0; Dan *true positive* (tp) = 59. Maka nilai *precision* = tp / (tp + fp) =  59 / (59 + 59) = 0.9833; Nilai *recall* = tp / (tp + fn) = 59 / (59 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.9833 ✕ 1.0000 / (0.9833 + 1.0000) = 0.9916$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 59 + 0 / (59 + 1 + 0 + 0) = 0.9833.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-2 pada algoritma *Support Vector Machine*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 1, *false negative* (fn) = 0; Dan *true positive* (tp) = 59. Maka nilai *precision* = tp / (tp + fp) =  59 / (59 + 59) = 0.9833; Nilai *recall* = tp / (tp + fn) = 59 / (59 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.9833 ✕ 1.0000 / (0.9833 + 1.0000) = 0.9916$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 59 + 0 / (59 + 1 + 0 + 0) = 0.9833.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-2 pada algoritma *Multilayer Perceptron*, jumlah *true negative* (tn) = 1, *false positive* (fp) = 0, *false negative* (fn) = 21; Dan *true positive* (tp) = 38. Maka nilai *precision* = tp / (tp + fp) =  38 / (38 + 38) = 1.0000; Nilai *recall* = tp / (tp + fn) = 38 / (38 + 21) = 0.6441; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 1.0000 ✕ 0.6441 / (1.0000 + 0.6441) = 0.7835$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 38 + 1 / (38 + 0 + 1 + 21) = 0.6500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-2 pada algoritma *Decision Tree*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 1, *false negative* (fn) = 8; Dan *true positive* (tp) = 51. Maka nilai *precision* = tp / (tp + fp) =  51 / (51 + 51) = 0.9808; Nilai *recall* = tp / (tp + fn) = 51 / (51 + 8) = 0.8644; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.9808 ✕ 0.8644 / (0.9808 + 0.8644) = 0.9189$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 51 + 0 / (51 + 1 + 0 + 8) = 0.8500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-2 pada algoritma *Random Forest*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 1, *false negative* (fn) = 6; Dan *true positive* (tp) = 53. Maka nilai *precision* = tp / (tp + fp) =  53 / (53 + 53) = 0.9815; Nilai *recall* = tp / (tp + fn) = 53 / (53 + 6) = 0.8983; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.9815 ✕ 0.8983 / (0.9815 + 0.8983) = 0.9381$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 53 + 0 / (53 + 1 + 0 + 6) = 0.8833.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-3 pada algoritma *Naive Bayes*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 9, *false negative* (fn) = 0; Dan *true positive* (tp) = 51. Maka nilai *precision* = tp / (tp + fp) =  51 / (51 + 51) = 0.8500; Nilai *recall* = tp / (tp + fn) = 51 / (51 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8500 ✕ 1.0000 / (0.8500 + 1.0000) = 0.9189$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 51 + 0 / (51 + 9 + 0 + 0) = 0.8500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-3 pada algoritma *Support Vector Machine*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 9, *false negative* (fn) = 0; Dan *true positive* (tp) = 51. Maka nilai *precision* = tp / (tp + fp) =  51 / (51 + 51) = 0.8500; Nilai *recall* = tp / (tp + fn) = 51 / (51 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8500 ✕ 1.0000 / (0.8500 + 1.0000) = 0.9189$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 51 + 0 / (51 + 9 + 0 + 0) = 0.8500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-3 pada algoritma *Multilayer Perceptron*, jumlah *true negative* (tn) = 0, *false positive* (fp) = 9, *false negative* (fn) = 6; Dan *true positive* (tp) = 45. Maka nilai *precision* = tp / (tp + fp) =  45 / (45 + 45) = 0.8333; Nilai *recall* = tp / (tp + fn) = 45 / (45 + 6) = 0.8824; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8333 ✕ 0.8824 / (0.8333 + 0.8824) = 0.8571$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 45 + 0 / (45 + 9 + 0 + 6) = 0.7500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-3 pada algoritma *Decision Tree*, jumlah *true negative* (tn) = 3, *false positive* (fp) = 6, *false negative* (fn) = 11; Dan *true positive* (tp) = 40. Maka nilai *precision* = tp / (tp + fp) =  40 / (40 + 40) = 0.8696; Nilai *recall* = tp / (tp + fn) = 40 / (40 + 11) = 0.7843; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.8696 ✕ 0.7843 / (0.8696 + 0.7843) = 0.8247$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 40 + 3 / (40 + 6 + 3 + 11) = 0.7167.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-3 pada algoritma *Random Forest*, jumlah *true negative* (tn) = 4, *false positive* (fp) = 5, *false negative* (fn) = 5; Dan *true positive* (tp) = 46. Maka nilai *precision* = tp / (tp + fp) =  46 / (46 + 46) = 0.9020; Nilai *recall* = tp / (tp + fn) = 46 / (46 + 5) = 0.9020; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.9020 ✕ 0.9020 / (0.9020 + 0.9020) = 0.9020$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 46 + 4 / (46 + 5 + 4 + 5) = 0.8333.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-4 pada algoritma *Naive Bayes*, jumlah *true negative* (tn) = 2, *false positive* (fp) = 23, *false negative* (fn) = 0; Dan *true positive* (tp) = 35. Maka nilai *precision* = tp / (tp + fp) =  35 / (35 + 35) = 0.6034; Nilai *recall* = tp / (tp + fn) = 35 / (35 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6034 ✕ 1.0000 / (0.6034 + 1.0000) = 0.7527$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 35 + 2 / (35 + 23 + 2 + 0) = 0.6167.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-4 pada algoritma *Support Vector Machine*, jumlah *true negative* (tn) = 2, *false positive* (fp) = 23, *false negative* (fn) = 0; Dan *true positive* (tp) = 35. Maka nilai *precision* = tp / (tp + fp) =  35 / (35 + 35) = 0.6034; Nilai *recall* = tp / (tp + fn) = 35 / (35 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6034 ✕ 1.0000 / (0.6034 + 1.0000) = 0.7527$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 35 + 2 / (35 + 23 + 2 + 0) = 0.6167.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-4 pada algoritma *Multilayer Perceptron*, jumlah *true negative* (tn) = 6, *false positive* (fp) = 19, *false negative* (fn) = 1; Dan *true positive* (tp) = 34. Maka nilai *precision* = tp / (tp + fp) =  34 / (34 + 34) = 0.6415; Nilai *recall* = tp / (tp + fn) = 34 / (34 + 1) = 0.9714; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6415 ✕ 0.9714 / (0.6415 + 0.9714) = 0.7727$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 34 + 6 / (34 + 19 + 6 + 1) = 0.6667.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-4 pada algoritma *Decision Tree*, jumlah *true negative* (tn) = 8, *false positive* (fp) = 17, *false negative* (fn) = 3; Dan *true positive* (tp) = 32. Maka nilai *precision* = tp / (tp + fp) =  32 / (32 + 32) = 0.6531; Nilai *recall* = tp / (tp + fn) = 32 / (32 + 3) = 0.9143; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6531 ✕ 0.9143 / (0.6531 + 0.9143) = 0.7619$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 32 + 8 / (32 + 17 + 8 + 3) = 0.6667.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-4 pada algoritma *Random Forest*, jumlah *true negative* (tn) = 4, *false positive* (fp) = 21, *false negative* (fn) = 3; Dan *true positive* (tp) = 32. Maka nilai *precision* = tp / (tp + fp) =  32 / (32 + 32) = 0.6038; Nilai *recall* = tp / (tp + fn) = 32 / (32 + 3) = 0.9143; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6038 ✕ 0.9143 / (0.6038 + 0.9143) = 0.7273$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 32 + 4 / (32 + 21 + 4 + 3) = 0.6000.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-5 pada algoritma *Naive Bayes*, jumlah *true negative* (tn) = 1, *false positive* (fp) = 19, *false negative* (fn) = 0; Dan *true positive* (tp) = 40. Maka nilai *precision* = tp / (tp + fp) =  40 / (40 + 40) = 0.6780; Nilai *recall* = tp / (tp + fn) = 40 / (40 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6780 ✕ 1.0000 / (0.6780 + 1.0000) = 0.8081$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 40 + 1 / (40 + 19 + 1 + 0) = 0.6833.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-5 pada algoritma *Support Vector Machine*, jumlah *true negative* (tn) = 1, *false positive* (fp) = 19, *false negative* (fn) = 0; Dan *true positive* (tp) = 40. Maka nilai *precision* = tp / (tp + fp) =  40 / (40 + 40) = 0.6780; Nilai *recall* = tp / (tp + fn) = 40 / (40 + 0) = 1.0000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6780 ✕ 1.0000 / (0.6780 + 1.0000) = 0.8081$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 40 + 1 / (40 + 19 + 1 + 0) = 0.6833.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-5 pada algoritma *Multilayer Perceptron*, jumlah *true negative* (tn) = 2, *false positive* (fp) = 18, *false negative* (fn) = 3; Dan *true positive* (tp) = 37. Maka nilai *precision* = tp / (tp + fp) =  37 / (37 + 37) = 0.6727; Nilai *recall* = tp / (tp + fn) = 37 / (37 + 3) = 0.9250; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6727 ✕ 0.9250 / (0.6727 + 0.9250) = 0.7789$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 37 + 2 / (37 + 18 + 2 + 3) = 0.6500.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-5 pada algoritma *Decision Tree*, jumlah *true negative* (tn) = 6, *false positive* (fp) = 14, *false negative* (fn) = 6; Dan *true positive* (tp) = 34. Maka nilai *precision* = tp / (tp + fp) =  34 / (34 + 34) = 0.7083; Nilai *recall* = tp / (tp + fn) = 34 / (34 + 6) = 0.8500; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.7083 ✕ 0.8500 / (0.7083 + 0.8500) = 0.7727$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 34 + 6 / (34 + 14 + 6 + 6) = 0.6667.



Pada diagram dibawah, dapat dilihat bahwa untuk fold ke-5 pada algoritma *Random Forest*, jumlah *true negative* (tn) = 2, *false positive* (fp) = 18, *false negative* (fn) = 4; Dan *true positive* (tp) = 36. Maka nilai *precision* = tp / (tp + fp) =  36 / (36 + 36) = 0.6667; Nilai *recall* = tp / (tp + fn) = 36 / (36 + 4) = 0.9000; *F1-Score* = 2 ✕ *precision* ✕ *recall* / (*precision* + *recall*) = 2 ✕ 0.6667 ✕ 0.9000 / (0.6667 + 0.9000) = 0.7660$; Nilai *accuracy* = tp + tn / (tp + fp + tn + fn) = 36 + 2 / (36 + 18 + 2 + 4) = 0.6333.


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

In [251]:
def highlight(s):
    is_max = s == s.max()
    is_min = s == s.min()
    
    return ['background-color: green; color: white;' if v == s.max() else 'background-color: red' if v == s.min() else '' for v in s]    

report_average_df.style.apply(
    highlight
).format(
    "{:0.4f}"
)


Unnamed: 0_level_0,Precision,Recall,F1-Score,Accuracy
Algoritma,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Naive Bayes,0.7914,1.0,0.8771,0.7967
Support Vector Machine,0.7885,1.0,0.8754,0.7933
Multilayer Perceptron,0.8156,0.8512,0.8143,0.7067
Decision Tree,0.8213,0.8243,0.8138,0.72
Random Forest,0.8033,0.9062,0.8444,0.7533


In [277]:
def join_with_and(target_list, conjunction="dan"):
    target_list = [f"*{word}*" for word in target_list]

    if (len(target_list) == 0):
        return ""
    elif (len(target_list) == 1):
        return target_list[0]
    elif (len(target_list) == 2):
        return f"{target_list[0]} {conjunction} {target_list[1]}"
    else:
        return ", ".join(target_list[:-1]) + f", {conjunction} "  + target_list[-1]

def better_idxmin(data_frame):
    return data_frame.idxmin().reset_index().apply(
        lambda row: [
            row["index"],
            data_frame[
                data_frame[row["index"]] == data_frame.min()[row["index"]]
            ].index.to_list(),
        ],
        axis=1,
        result_type="expand",
    ).set_index(0).T.iloc[0]

def better_idxmax(data_frame):
    return data_frame.idxmax().reset_index().apply(
        lambda row: [
            row["index"],
            data_frame[
                data_frame[row["index"]] == data_frame.max()[row["index"]]
            ].index.to_list(),
        ],
        axis=1,
        result_type="expand",
    ).set_index(0).T.iloc[0]

min_idxs = better_idxmin(report_average_df).apply(join_with_and) 
max_idxs = better_idxmin(report_average_df).apply(join_with_and) 
max_nums = report_average_df.max().apply(lambda num: f"{num:0.4f}")
min_nums = report_average_df.min().apply(lambda num: f"{num:0.4f}")

In [286]:
from IPython.display import display, Markdown, Latex
display(Markdown(f"""

Dari hasil pengujian, dapat dilihat bahwa dari rata-rata nilai *precision*, algoritma {max_idxs["Precision"]} mendapat nilai tertinggi yaitu {max_nums["Precision"]}, sedangkan {min_idxs["Precision"]} terendah pada {min_nums["Precision"]}. Untuk rata-rata nilai *recall*, {max_idxs["Recall"]} mendapat nilai tertinggi dengan skor {max_nums["Recall"]} sedangkan {min_idxs["Recall"]} terendah dengan skor {min_nums["Recall"]}. Untuk *F1-Score*, rata-rata nilai tertinggi terdapat pada algoritma {max_idxs["F1-Score"]}, yaitu {max_nums["F1-Score"]}, sementara rata-rata nilai terendah terdapat pada algoritma {min_idxs["F1-Score"]} dengan nilai {min_nums["F1-Score"]}. Pada rata-rata nilai *accuracy*, nilai tertinggi berada pada algoritma {max_idxs["Accuracy"]} dengan nilai {max_nums["Accuracy"]} dan nilai terendah berada pada algoritma {min_idxs["Accuracy"]} dengan nilai {min_nums["Accuracy"]}.
"""
))




Dari hasil pengujian, dapat dilihat bahwa dari rata-rata nilai *precision*, algoritma *Support Vector Machine* mendapat nilai tertinggi yaitu 0.8213, sedangkan *Support Vector Machine* terendah pada 0.7885. Untuk rata-rata nilai *recall*, *Decision Tree* mendapat nilai tertinggi dengan skor 1.0000 sedangkan *Decision Tree* terendah dengan skor 0.8243. Untuk *F1-Score*, rata-rata nilai tertinggi terdapat pada algoritma *Decision Tree*, yaitu 0.8771, sementara rata-rata nilai terendah terdapat pada algoritma *Decision Tree* dengan nilai 0.8138. Pada rata-rata nilai *accuracy*, nilai tertinggi berada pada algoritma *Multilayer Perceptron* dengan nilai 0.7967 dan nilai terendah berada pada algoritma *Multilayer Perceptron* dengan nilai 0.7067.
