In [None]:
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# Base

In [None]:
base_conf_matrix_lg = np.array([[147364,  50424],
       [   648,   1564]])

In [None]:
base_conf_matrix_rf = np.array([[157017,  40771],
       [   755,   1457]])

In [None]:
base_conf_matrix_nb = np.array([[132735,  65053],
       [   636,   1576]])

In [None]:
base_conf_matrix_el = np.array([[148111,  49677],
       [   634,   1578]])

# Transformation

In [None]:
tf_conf_matrix_lg = np.array([[149006,  48782],
       [   653,   1559]])

In [None]:
tf_conf_matrix_rf = np.array([[158279,  39509],
       [   763,   1449]])

In [None]:
tf_conf_matrix_nb = np.array([[134845,  62943],
       [   656,   1556]])

In [None]:
tf_conf_matrix_el = np.array([[149963,  47825],
       [   650,   1562]])

# Polynomial

In [None]:
pm_conf_matrix_lr = np.array([[152073,  45715],
       [   615,   1597]])

In [None]:
pm_conf_matrix_rf = np.array([[153258,  44530],
       [   697,   1515]])

In [None]:
pm_conf_matrix_nb = np.array([[103591,  94197],
       [   470,   1742]])

In [None]:
pm_conf_matrix_el = np.array([[147355,  50433],
       [   575,   1637]])

# Plot

In [None]:
# Define the confusion matrices
confusion_matrices = {
        'Base': {
        'Logistic Regression': np.array([[147364, 50424], [648, 1564]]),
        'Random Forest': np.array([[157017, 40771], [755, 1457]]),
        'Naive Bayes': np.array([[132735, 65053], [636, 1576]]),
        'Ensemble Learning': np.array([[148111, 49677], [634, 1578]])
    },
    'Transformation': {
        'Logistic Regression': np.array([[149006, 48782], [653, 1559]]),
        'Random Forest': np.array([[158279, 39509], [763, 1449]]),
        'Naive Bayes': np.array([[134845, 62943], [656, 1556]]),
        'Ensemble Learning': np.array([[149963, 47825], [650, 1562]])
    },
    'Polynomial': {
        'Logistic Regression': np.array([[152073, 45715], [615, 1597]]),
        'Random Forest': np.array([[153258, 44530], [697, 1515]]),
        'Naive Bayes': np.array([[103591, 94197], [470, 1742]]),
        'Ensemble Learning': np.array([[147355, 50433], [575, 1637]])
    }
}

# Function to calculate metrics from confusion matrix
def calculate_metrics(conf_matrix):
    tn, fp, fn, tp = conf_matrix.ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp) if (tp + fp) != 0 else 0
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
    return accuracy, precision, recall, f1_score

# Plotting function
def plot_confusion_matrix(method, algorithm):
    conf_matrix = confusion_matrices[method][algorithm]
    accuracy, precision, recall, f1_score = calculate_metrics(conf_matrix)

    fig, ax = plt.subplots()
    cax = ax.matshow(conf_matrix, cmap=plt.cm.Blues)
    plt.title(f'Confusion Matrix for {algorithm} ({method})\n', y=1.1)
    fig.colorbar(cax)

    # Add numbers to the matrix
    for (i, j), val in np.ndenumerate(conf_matrix):
        ax.text(j, i, f'{val}', ha='center', va='center')

    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

    # Display metrics as percentages
    print("     Algorithm vs Dummy Classifier")
    print("  -------------------------------------")
    print(f"    Accuracy:  {accuracy:.2f} vs 0.99")
    print(f"    Precision: {precision:.2f} vs 0.00")
    print(f"    Recall:    {recall:.2f} vs 0.00")
    print(f"    F1 Score:  {f1_score:.2f} vs 0.00")

# Define a layout for the buttons
button_layout = widgets.Layout(display='flex', justify_content='center', width='100%')

# Selection buttons for method and algorithm
method_widget = widgets.ToggleButtons(
    options=['Base', 'Transformation', 'Polynomial'],
    description= "Method:",
    layout=button_layout
)

algorithm_widget = widgets.ToggleButtons(
    options=['Logistic Regression', 'Random Forest', 'Naive Bayes', 'Ensemble Learning'],
    description= "Algorithm:",
    layout=button_layout
)

# Interactive widget
widgets.interactive(plot_confusion_matrix, method=method_widget, algorithm=algorithm_widget)


interactive(children=(ToggleButtons(description='Method:', layout=Layout(display='flex', justify_content='cent…

In [None]:

**Packages used:**

* `numpy`: For numerical operations and array manipulation.
* `matplotlib.pyplot`: For plotting the confusion matrix.
* `ipywidgets`: For creating interactive widgets.
* `IPython.display`: For displaying the plot.

**Process for the confusion matrix:**

1. **Calculate the confusion matrix:** The confusion matrix is a table that shows the number of true positives, true negatives, false positives, and false negatives for a given classification model.
2. **Plot the confusion matrix:** The confusion matrix can be plotted as a heatmap, where the darker colors represent higher values.
3. **Calculate and display metrics:** From the confusion matrix, we can calculate metrics such as accuracy, precision, recall, and F1 score. These metrics provide insights into the performance of the classification model.

**Importance of plotting the confusion matrix:**

The confusion matrix is an important tool for evaluating the performance of a classification model because it provides a visual representation of the model's predictions. This can help us to identify any areas where the model is performing poorly and make adjustments accordingly.

For example, if we see that the model has a high number of false positives, it means that the model is predicting a positive class when it should actually be predicting a negative class. This could be due to the model being too sensitive or to the data being imbalanced.

By plotting the confusion matrix, we can quickly and easily see where the model is making mistakes and take steps to improve its performance.