In [None]:
# evaluation_summary.py

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def calculate_and_summarize_metrics(method_results):
    """
    Calculate and summarize classification evaluation metrics for multiple feature selection methods.

    Parameters:
        method_results (dict): A dictionary where each key is a method name and its value is a dictionary
            containing 'y_true' and 'y_pred' lists or arrays.
            Example:
            {
                "Method1": {"y_true": [...], "y_pred": [...]},
                "Method2": {"y_true": [...], "y_pred": [...]},
                "Method3": {"y_true": [...], "y_pred": [...]}
            }

    Returns:
        tuple: A tuple (message, formatted_output, df_table) where:
            - message (str): A status message.
            - formatted_output (dict): A dictionary with the computed metrics for each method.
            - df_table (pd.DataFrame): A DataFrame containing the evaluation metrics for display.
    """
    summary_data = {}

    for method, results in method_results.items():
        y_true = results.get("y_true")
        y_pred = results.get("y_pred")
        if y_true is None or y_pred is None:
            continue  # Skip if necessary data isn't provided.

        # Compute metrics (multiplying by 100 to express percentages)
        accuracy = accuracy_score(y_true, y_pred) * 100
        precision = precision_score(y_true, y_pred, zero_division=0) * 100
        recall = recall_score(y_true, y_pred, zero_division=0) * 100
        f1 = f1_score(y_true, y_pred, zero_division=0) * 100

        summary_data[method] = {
            "Accuracy": accuracy,
            "Precision": precision,
            "F-score": f1,
            "Recall": recall
        }

    if not summary_data:
        return "No valid evaluation metrics computed. Please check your input data.", None, None

    # Create a DataFrame for table display
    df_table = pd.DataFrame.from_dict(summary_data, orient="index").reset_index()
    df_table.rename(columns={"index": "Method"}, inplace=True)

    return "Done!", summary_data, df_table

def visualize_evaluation_metrics(evaluation_data, save_path=None):
    """
    Visualize classification evaluation metrics as a grouped bar chart.

    Parameters:
        evaluation_data (dict): A dictionary where keys are method names and values are dictionaries
            containing classification metrics.
            Example:
            {
                "Method1": {"Accuracy": 93.3, "Precision": 95.4, "F-score": 88.9, "Recall": 61.2},
                "Method2": {"Accuracy": 93.0, "Precision": 99.9, "F-score": 24.7, "Recall": 14.1},
                "Method3": {"Accuracy": 93.0, "Precision": 99.9, "F-score": 19.1, "Recall": 10.6}
            }
        save_path (str): Optional path to save the generated plot image.

    Returns:
        fig: The matplotlib figure object.
    """
    # Extract metric names from one of the methods.
    metrics_keys = list(next(iter(evaluation_data.values())).keys())
    methods = list(evaluation_data.keys())
    n_metrics = len(metrics_keys)
    n_methods = len(methods)

    # Prepare a 2D array of data (rows: metrics, columns: methods)
    data = np.zeros((n_metrics, n_methods))
    for i, metric in enumerate(metrics_keys):
        for j, method in enumerate(methods):
            data[i, j] = evaluation_data[method].get(metric, 0)

    # Create a grouped bar chart.
    x = np.arange(n_metrics)  # positions for each metric on the x-axis
    width = 0.8 / n_methods   # width of each bar

    fig, ax = plt.subplots(figsize=(10, 6))

    for j, method in enumerate(methods):
        ax.bar(x + j * width, data[:, j], width, label=method)

    ax.set_ylabel('Metric Value (%)')
    ax.set_title('Classification Evaluation Metrics by Method')
    ax.set_xticks(x + width * (n_methods - 1) / 2)
    ax.set_xticklabels(metrics_keys)
    ax.legend()

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    return fig