In [3]:
result_dir = "/mnt/volume1/qyc/auto-annot/result_across/"
# tools = ["RF","scVI", "singleCellNet", "SingleR", "SVM"]
tools = ["RF", "singleCellNet", "SingleR", "SVM"]

In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [7]:
pred_labels_list = []
true_labels_list = []
time_list = []
for tool in tools:
    pred_labels_list.append(pd.read_csv(f"{result_dir}{tool}/{tool}_Pred_Labels.csv"))
    true_labels_list.append(pd.read_csv(f"{result_dir}{tool}/{tool}_True_Labels.csv"))
    time_list.append(pd.read_csv(f"{result_dir}{tool}/{tool}_Total_Time.csv").iloc[0,0])

In [12]:
# Function to calculate metrics
def calculate_metrics(y_true, y_pred):

    f1_scores = f1_score(y_true, y_pred, average=None)

    metrics = {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Macro F1': f1_score(y_true, y_pred, average='macro'),
        'Median F1': np.median(f1_scores),
        'Weighted F1': f1_score(y_true, y_pred, average='weighted'),
        'Macro Precision': precision_score(y_true, y_pred, average='macro'),
        'Macro Recall': recall_score(y_true, y_pred, average='macro'),
    }
    return metrics

In [13]:
# Calculate metrics for each tool
results = {}
for i, tool in enumerate(tools):
    y_true = true_labels_list[i].iloc[:, 0]
    y_pred = pred_labels_list[i].iloc[:, 0]
    results[tool] = calculate_metrics(y_true, y_pred)

# Create a DataFrame with results
results_df = pd.DataFrame(results).T
results_df['Time (s)'] = time_list

# Display results
print("\nPerformance Metrics for Each Tool:")
print(results_df.round(4))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Performance Metrics for Each Tool:
               Accuracy  Macro F1  Median F1  Weighted F1  Macro Precision  \
RF               0.9969    0.9958     0.9973       0.9969           0.9977   
singleCellNet    0.9736    0.9760     0.9866       0.9735           0.9791   
SingleR          0.7098    0.6701     0.7464       0.7231           0.6949   
SVM              0.9974    0.8723     0.9969       0.9974           0.8726   

               Macro Recall   Time (s)  
RF                   0.9939    84.6806  
singleCellNet        0.9731  1122.9200  
SingleR              0.7213   632.0059  
SVM                  0.8721    19.1795  


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [34]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib as mpl
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd

# 设置字体为 Arial 并放大字号
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 18
mpl.rcParams['axes.titlesize'] = 18
mpl.rcParams['xtick.labelsize'] = 18
mpl.rcParams['ytick.labelsize'] = 18
plt.figure(figsize=(14, 7))

# 指标
metrics_to_plot = ['Accuracy', 'Median F1', 'Weighted F1', 'Macro Precision', 'Macro Recall']

# Time Score（越小越好）
max_time = results_df['Time (s)'].max()
min_time = results_df['Time (s)'].min()
results_df['Time Score'] = 1 - (results_df['Time (s)'] - min_time) / (max_time - min_time)
results_df['Time Score'] = 0.7 + 0.3 * results_df['Time Score']

# Total Score = 原始指标平均值
results_df['Total Score'] = results_df[metrics_to_plot].mean(axis=1)

# 按 Total Score 升序排序（分数高的在上）
results_df = results_df.sort_values(by='Total Score', ascending=True)
tools = results_df.index.tolist()

# 所有指标列
all_metrics = metrics_to_plot + ['Time Score', 'Total Score']
metric_names = ['Accuracy', 'Median F1', 'Weighted F1', 'Precision', 'Recall', 'Time', 'Total Score']

# 设置颜色映射：浅蓝到深蓝
base_cmap = cm.get_cmap('Blues', 256)
truncated_blues = mcolors.LinearSegmentedColormap.from_list(
    'truncated_blues', base_cmap(np.linspace(0.3, 1.0, 256))
)

# 画图
for j, metric in enumerate(all_metrics):
    for i, tool in enumerate(tools):
        value = results_df.loc[tool, metric]
        size = value * 2000
        norm = (value - results_df[metric].min()) / (results_df[metric].max() - results_df[metric].min())
        color = truncated_blues(norm)
        plt.scatter(j, i, s=size, alpha=0.9, color=color, edgecolor='k', linewidth=0.3)

# 美化图像
# 美化图像
plt.yticks(range(len(tools)), tools)
plt.xticks(range(len(metric_names)), metric_names, rotation=0)

# 调整纵向点间距，让点更接近
plt.ylim(-0.5, len(tools) - 0.5)

plt.title('')
plt.tight_layout()
plt.savefig('performance_metrics_sorted_scatter.pdf', bbox_inches='tight')
plt.close()

# 保存结果
results_df.to_csv('performance_metrics_results.csv')


  base_cmap = cm.get_cmap('Blues', 256)
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font fa