In [35]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np

def process_multiple_inputs(input_file_output_folder_pairs, cate_num=3):
    """
    Processes multiple input file pairs, computes the classification report and confusion matrix,
    and saves the outputs (confusion matrix image and classification report CSV) to the respective output folders.

    Args:
        input_file_output_folder_pairs (list of tuples): A list where each tuple contains:
            - First element: List of two file paths [predictions_file, labels_file].
            - Second element: Output folder path for saving results.

    Returns:
        None
    """
    # Define the column headers and label mapping
    if cate_num == 3:
        columns = ['Mild Pain', 'No Pain', 'Pain']
        label_mapping = {0: 'No Pain', 1: 'Mild', 2: 'Obvious'}
    elif cate_num == 4:
        columns = ['Mild Pain', 'No Pain', 'Pain', 'Obvious']
        label_mapping = {0: 'No Pain', 1: 'Weak', 2: 'Mild', 3: 'Strong'}

    for file_pair in input_file_output_folder_pairs:
        # Unpack each tuple
        input_files, output_folder = file_pair
        predictions_file = input_files[0]
        labels_file = input_files[1]

        # Ensure the output folder exists
        os.makedirs(output_folder, exist_ok=True)

        # Read and process labels
        with open(labels_file, 'r') as lab:
            labels = lab.readlines()
            labels = [x.strip() for x in labels]
            labels = [x.split(' ') for x in labels]
            labels = pd.DataFrame(labels, columns=columns)
            labels = labels.astype(int).idxmax(axis=1)  # Convert one-hot to class labels

        # Read and process predictions
        with open(predictions_file, 'r') as pred:
            predictions = pred.readlines()
            predictions = [x.strip() for x in predictions]
            predictions = [x.split(' ') for x in predictions]
            predictions = pd.DataFrame(predictions, columns=columns)
            predictions = predictions.astype(int).idxmax(axis=1)  # Convert one-hot to class labels

        # Generate and save classification report
        report = classification_report(labels, predictions, target_names=label_mapping.values(), output_dict=True)
        report_df = pd.DataFrame(report).transpose()
        report_csv_path = os.path.join(output_folder, 'classification_report.csv')
        report_df.to_csv(report_csv_path)
        print(f"Classification report saved at: {report_csv_path}")

        # Generate and save confusion matrix
        cm = confusion_matrix(labels, predictions)
        cm_original = cm.copy()  # Keep a copy of the original amounts
        cm += 1
        cm = np.log10(cm)

        plt.figure(figsize=(7, 6))
        ax = sns.heatmap(cm, annot=True, fmt='.2f', cmap=sns.light_palette("seagreen", as_cmap=True),
                         xticklabels=label_mapping.values(), yticklabels=label_mapping.values(), annot_kws={"size": 20},
                         square=True, vmin=0, vmax=4, cbar=False)

        # Add original amounts under the heatmap annotations (slightly lower and with white background)
        for i in range(cm_original.shape[0]):
            for j in range(cm_original.shape[1]):
                text = ax.text(j + 0.5, i + 0.7, f"({cm_original[i, j]})",
                               fontsize=14, color="black", ha='center', va='center', clip_on=True,
                               bbox=dict(boxstyle="round,pad=0.2", edgecolor='none', facecolor='white', alpha=0.8))

        # Customizing visualization
        plt.xlabel("Predicted", fontsize=20, fontweight='bold')
        plt.ylabel("Actual", fontsize=20, fontweight='bold')
        plt.xticks(fontsize=18)  # Set x-ticklabel font size
        plt.yticks(fontsize=18)  # Set y-ticklabel font size
        # No title
        confusion_matrix_path = os.path.join(output_folder, 'confusion_matrix.png')
        plt.savefig(confusion_matrix_path, bbox_inches='tight', pad_inches=0.1)
        plt.close()  # Close the plot to avoid memory issues
        print(f"Confusion matrix saved at: {confusion_matrix_path}")

# Example list of input file pairs
input_file_output_folder_pairs = [
    (['bb.txt', '../data/UNBC/list/UNBC_test_pspi_fold1.txt'], 'only backbone'),
    (['no gnn.txt', '../data/UNBC/list/UNBC_test_pspi_fold1.txt'], 'no gnn'),
    (['pain_predictions.txt', '../data/UNBC/list/UNBC_test_pspi_fold1.txt'], 'full'),
    (['full gr.txt', '../data/UNBC/list/UNBC_test_pspi_fold1.txt'], 'full + graph representation'),
    (['full gr no sft.txt', '../data/UNBC/list/UNBC_test_pspi_fold1.txt'], 'full no sft'),
]

# Call the function
process_multiple_inputs(input_file_output_folder_pairs)

process_multiple_inputs(
    [(['4cat.txt', '../data/UNBC/list/final_4/UNBC_test_pspi_fold1.txt'], '4 cat')], cate_num=4
)

Classification report saved at: only backbone\classification_report.csv
Confusion matrix saved at: only backbone\confusion_matrix.png
Classification report saved at: no gnn\classification_report.csv
Confusion matrix saved at: no gnn\confusion_matrix.png
Classification report saved at: full\classification_report.csv
Confusion matrix saved at: full\confusion_matrix.png
Classification report saved at: full + graph representation\classification_report.csv
Confusion matrix saved at: full + graph representation\confusion_matrix.png
Classification report saved at: full no sft\classification_report.csv
Confusion matrix saved at: full no sft\confusion_matrix.png
Classification report saved at: 4 cat\classification_report.csv
Confusion matrix saved at: 4 cat\confusion_matrix.png


In [47]:
import pandas as pd
import os

# Folder names and abbreviated model names
folders = {'full + graph representation': 'Full', 'full': 'W/o graph rep.', 'no gnn': 'W/o GNN','only backbone': 'Only ResNet' }
categories = ['No Pain', 'Mild', 'Obvious']
# Initialize an empty dictionary to store the metrics
model_metrics = {}

# Loop through each folder to extract required metrics (F1-score, recall, and precision)
for folder, abbrev in folders.items():
    report_path = os.path.join(folder, 'classification_report.csv')
    if os.path.exists(report_path):
        report = pd.read_csv(report_path, index_col=0)

        # Get F1-scores, recall, and precision for each class
        f1_scores = report.loc[['No Pain', 'Mild', 'Obvious'], 'f1-score'].values
        recalls = report.loc[['No Pain', 'Mild', 'Obvious'], 'recall'].values
        precisions = report.loc[['No Pain', 'Mild', 'Obvious'], 'precision'].values

        # Save F1, recall, and precision for the current model
        model_metrics[abbrev] = {'f1': f1_scores, 'recall': recalls, 'precision': precisions}

# Calculate min and max for each category across all metrics
min_max_values = {}
for metric in ['f1', 'recall', 'precision']:
    min_max_values[metric] = {i: (float('inf'), float('-inf')) for i in range(len(categories))}

for model in model_metrics.values():
    for metric in min_max_values.keys():
        for i, value in enumerate(model[metric]):
            current_min, current_max = min_max_values[metric][i]
            min_max_values[metric][i] = (min(current_min, value), max(current_max, value))

# Calculate min and max values for the mean column
mean_min, mean_max = float('inf'), float('-inf')

# Compute min and max for the mean column across all models
for model in model_metrics.values():
    for metric in ['f1', 'recall', 'precision']:
        mean_value = sum(model[metric]) / len(model[metric])
        mean_min = min(mean_min, mean_value)
        mean_max = max(mean_max, mean_value)

# Helper function to apply color based on category-specific min-max value
def color_cell(value, min_val, max_val):
    if max_val == min_val:  # Handle case when all values are the same
        normalized_value = 0.5  # Assign a neutral midpoint
    else:
        normalized_value = (value - min_val) / (max_val - min_val)
    green_intensity = int(155 + 100 * normalized_value)
    red_intensity = int(255 - 100 * normalized_value)
    return f"\\cellcolor[RGB]{{{red_intensity},{green_intensity},155}}{value:.1f}"

# Prepare the values to be inserted into the LaTeX table
table_data = []
metrics = ['F1', 'Recall', 'Precision']

# Loop through the models and metrics to create rows
for abbrev in folders.values():
    for metric_idx, metric in enumerate(metrics):
        row = [abbrev if metric_idx == 0 else "", metric]  # Add model name for the first metric row only
        mean_value = 0  # Initialize mean value
        metric_values = []

        for i in range(len(model_metrics[abbrev][metric.lower()])):
            value = float(model_metrics[abbrev][metric.lower()][i]) * 100
            metric_values.append(value)
            min_val, max_val = min_max_values[metric.lower()][i]
            row.append(color_cell(value, min_val * 100, max_val * 100))  # Apply cell coloring

        mean_value = sum(metric_values) / len(metric_values)  # Compute mean value
        row.append(color_cell(mean_value, mean_min * 100, mean_max * 100))  # Apply cell coloring to the mean column
        table_data.append(row)

# Construct the LaTeX table with resizebox
latex_table = "\\begin{table}[htbp]\n\\centering\n\\resizebox{\\columnwidth}{!}{%\n\\begin{tabular}{l|l|" + "|".join(["c"] * (len(categories) + 1)) + "}\n"
latex_table += "\\hline\n"
latex_table += "\\textbf{Model} & \\textbf{Metric} & " + " & ".join([f"\\textbf{{{category}}}" for category in categories]) + " & \\textbf{Mean} \\\\\n"
latex_table += "\\hline\n"

# Add rows to the LaTeX table
for row in table_data:
    latex_table += f"\\multirow{{3}}{{*}}{{{row[0]}}} & {row[1]} & " if row[0] != "" else f" & {row[1]} & "
    latex_table += " & ".join(row[2:]) + " \\\\\n"
    if row[1] == "Precision":  # Add \hline after the last row of each model
        latex_table += "\\hline\n"

latex_table += "\\end{tabular}%\n}\n"
latex_table += "\\caption{Three-category classification results with separate columns for models and metrics. F1, recall, precision, and their mean values are shown for No Pain, Mild Pain, and Pain categories.}\n"
latex_table += "\\label{tab:all_models_results}\n"
latex_table += "\\end{table}"

print(latex_table)

KeyError: 'f1-score'

In [50]:
import pandas as pd

def classification_results_to_dataframe(csv_path):
    """
    Reads a classification report CSV file and outputs a DataFrame with the specified format.

    Args:
        csv_path (str): Path to the classification report CSV file.

    Returns:
        pd.DataFrame: DataFrame with columns ["Metric", each category, "Overall"]
                      and rows ["F1", "Recall", "Precision", "Accuracy"].
    """
    # Read classification report from the CSV
    report = pd.read_csv(csv_path, index_col=0)

    # Extract the required metrics for each pain level category
    categories = [col for col in report.index if col not in ['accuracy', 'macro avg', 'weighted avg']]
    metrics = ['f1-score', 'recall', 'precision']
    overall = report.loc['accuracy', 'precision'] * 100 if 'accuracy' in report.index else None  # Accuracy if present

    data = []
    for metric in metrics:
        row = [metric.capitalize()] + [report.at[category, metric] * 100 for category in categories]
        overall_metric = report.loc['macro avg', metric] * 100 if 'macro avg' in report.index else None
        data.append(row + [overall_metric])

    # Include accuracy row
    if overall is not None:
        accuracy_row = ['Accuracy'] + [None] * len(categories) + [overall]
        data.append(accuracy_row)

    columns = ['Metric'] + categories + ['Overall']
    results_df = pd.DataFrame(data, columns=columns)

    return results_df

cat4 = classification_results_to_dataframe('../submission_results/4 cat/classification_report.csv')
cat3 = classification_results_to_dataframe('../submission_results/full + graph representation/classification_report.csv')
cat3

Unnamed: 0,Metric,No Pain,Mild,Obvious,Overall
0,F1-score,93.101749,51.18525,54.347826,66.211609
1,Recall,92.850123,52.475248,51.020408,65.448593
2,Precision,93.354743,49.957155,58.139535,67.150478
3,Accuracy,,,,87.613649


In [51]:
def create_latex_table_from_dataframe(df):
    """
    Convert a classification results DataFrame into a LaTeX table.

    Args:
        df (pd.DataFrame): DataFrame containing classification metrics with columns
                           ["Metric", categories, "Overall"].

    Returns:
        str: LaTeX table string with merged cells for "Accuracy" across pain levels.
    """

    cat_num = len(df.columns[1:-1])

    latex_table = "\\begin{table}[htbp]\n\\centering\n\\resizebox{\\columnwidth}{!}{%\n\\begin{tabular}{l|" + "c|" * (df.shape[1] - 2) + "c}\n"
    latex_table += "\\hline\n"
    latex_table += "\\textbf{Metric} & " + " & ".join([f"\\textbf{{{col}}}" for col in df.columns[1:-1]]) + " & \\textbf{Overall} \\\\\n"
    latex_table += "\\hline\n"

    for _, row in df.iterrows():
        if row["Metric"] == "Accuracy":
            merged_pain_levels = " & ".join(["\\multicolumn{1}{c|}{}" for _ in range(len(df.columns) - 2)])  # Empty merged cells
            latex_table += f"\\multirow{{1}}{{*}}{{Accuracy}} & {merged_pain_levels} & {row['Overall']:.2f} \\\\\n"
        else:
            values = " & ".join([f"{row[col]:.2f}" if not pd.isna(row[col]) else "" for col in df.columns[1:]])
            latex_table += f"{row['Metric']} & {values} \\\\\n"

    latex_table += "\\hline\n"
    latex_table += "\\end{tabular}%\n}\n"
    latex_table += "\\caption{Classification metrics in LaTeX table format with merged accuracy cells.}\n"
    latex_table += "\\label{tab:classification_results_cat" + str(cat_num) + "}\n"
    latex_table += "\\end{table}"

    return latex_table

# Example usage
latex_table_3cat = create_latex_table_from_dataframe(cat3)
latex_table_4cat = create_latex_table_from_dataframe(cat4)
print(latex_table_3cat)
print(latex_table_4cat)

\begin{table}[htbp]
\centering
\resizebox{\columnwidth}{!}{%
\begin{tabular}{l|c|c|c|c}
\hline
\textbf{Metric} & \textbf{No Pain} & \textbf{Mild} & \textbf{Obvious} & \textbf{Overall} \\
\hline
F1-score & 93.10 & 51.19 & 54.35 & 66.21 \\
Recall & 92.85 & 52.48 & 51.02 & 65.45 \\
Precision & 93.35 & 49.96 & 58.14 & 67.15 \\
\multirow{1}{*}{Accuracy} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & 87.61 \\
\hline
\end{tabular}%
}
\caption{Classification metrics in LaTeX table format with merged accuracy cells.}
\label{tab:classification_results_cat3}
\end{table}
\begin{table}[htbp]
\centering
\resizebox{\columnwidth}{!}{%
\begin{tabular}{l|c|c|c|c|c}
\hline
\textbf{Metric} & \textbf{No Pain} & \textbf{Weak} & \textbf{Mild} & \textbf{Strong} & \textbf{Overall} \\
\hline
F1-score & 91.32 & 0.00 & 47.46 & 36.51 & 43.83 \\
Recall & 87.09 & 0.00 & 76.91 & 47.84 & 52.96 \\
Precision & 95.99 & 0.00 & 34.32 & 29.52 & 39.96 \\
\multirow{1}{*}{Accuracy} & \multicolumn{1}{

In [54]:
def create_custom_latex_table(df):
    """
    Create a LaTeX table with specified metrics for two models: GraphAU-Pain and GLA-CNN.

    Args:
        df (pd.DataFrame): DataFrame containing classification metrics.

    Returns:
        str: LaTeX table string.
    """
    # Extract values for GraphAU-Pain model from cat4 DataFrame
    graphau_pain_values = [
        df.loc[df['Metric'] == 'Accuracy', 'Overall'].values[0],
        df.loc[df['Metric'] == 'F1-score', 'Overall'].values[0],
        df.loc[df['Metric'] == 'Recall', 'Overall'].values[0],
        df.loc[df['Metric'] == 'Precision', 'Overall'].values[0]
    ]

    # Define values for GLA-CNN
    gla_cnn_values = [56.45, 36.52, 34.08, 43.23]

    # Compute difference and define up/down arrows
    differences = [(gp - gc, "↑" if gp > gc else "↓") for gp, gc in zip(graphau_pain_values, gla_cnn_values)]

    # Start constructing the LaTeX table using resizebox for column width
    latex_table = "\\begin{table}[htbp]\n\\centering\n\\resizebox{\\columnwidth}{!}{%\n\\begin{tabular}{l|c|c|c|c}\n"
    latex_table += "\\hline\n"
    latex_table += "\\textbf{Model} & \\textbf{Accuracy} & \\textbf{F1-score} & \\textbf{Recall} & \\textbf{Precision} \\\\\n"
    latex_table += "\\hline\n"

    # Add rows for each model
    latex_table += f"GraphAU-Pain & {graphau_pain_values[0]:.2f} ({differences[0][1]}{abs(differences[0][0]):.2f}) & " \
                   f"{graphau_pain_values[1]:.2f} ({differences[1][1]}{abs(differences[1][0]):.2f}) & " \
                   f"{graphau_pain_values[2]:.2f} ({differences[2][1]}{abs(differences[2][0]):.2f}) & " \
                   f"{graphau_pain_values[3]:.2f} ({differences[3][1]}{abs(differences[3][0]):.2f}) \\\\\n"

    latex_table += f"GLA-CNN & {gla_cnn_values[0]:.2f} & {gla_cnn_values[1]:.2f} & {gla_cnn_values[2]:.2f} & {gla_cnn_values[3]:.2f} \\\\\n"

    latex_table += "\\hline\n"
    latex_table += "\\end{tabular}%\n}\n\\caption{Custom model comparison for Accuracy, F1-score, Recall, and Precision.}\n"
    latex_table += "\\label{tab:model_comparison}\n\\end{table}"

    return latex_table

# Example usage
custom_table = create_custom_latex_table(cat4)
print(custom_table)

\begin{table}[htbp]
\centering
\resizebox{\columnwidth}{!}{%
\begin{tabular}{l|c|c|c|c}
\hline
\textbf{Model} & \textbf{Accuracy} & \textbf{F1-score} & \textbf{Recall} & \textbf{Precision} \\
\hline
GraphAU-Pain & 82.40 (↑25.95) & 43.83 (↑7.31) & 52.96 (↑18.88) & 39.96 (↓3.27) \\
GLA-CNN & 56.45 & 36.52 & 34.08 & 43.23 \\
\hline
\end{tabular}%
}
\caption{Custom model comparison for Accuracy, F1-score, Recall, and Precision.}
\label{tab:model_comparison}
\end{table}
