#### Generate the true.csv and pred.csv files for all the results of this experiment.

In [None]:
import os
import numpy as np
import pandas as pd

datasets = ["Muraro", 'Baron_Mouse', 'Baron_Human', 'Zhang_T', 'Kang_ctrl', 'AMB', 'TM', 'Zheng68K']

base_path = "../../result/wcsn_preds/"
save_folder = "../Tables"

for dataset in datasets:
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)
    os.makedirs(save_data_folder, exist_ok=True)
    seq_dict_file = f'../../result/datasets/{dataset}/seq_dict.npz'
    seq_dict = np.load(seq_dict_file, allow_pickle=True) 
    str_labels = seq_dict['str_labels']

    data_file = os.path.join(base_path, f"{dataset}_a0.01_hvgs2000_prediction.h5")
    cell_type = pd.read_hdf(data_file, key='cell_type')

    true_labels = cell_type['true_cell_type'].values
    pred_labels = cell_type['pred_cell_type'].values

    true_str_labels = [str_labels[label] for label in true_labels]
    pred_str_labels = [str_labels[label] for label in pred_labels]


    true_df = pd.DataFrame(true_str_labels)
    pred_df = pd.DataFrame(pred_str_labels)
    
    true_label_csv_path = os.path.join(save_data_folder, f"wcsgnet_true.csv")
    pred_label_csv_path = os.path.join(save_data_folder, f"wcsgnet_pred.csv")

    true_df.to_csv(true_label_csv_path, index=False)
    pred_df.to_csv(pred_label_csv_path, index=False)   


    print(f"Saved true labels to {true_label_csv_path}")
    print(f"Saved predicted labels to {pred_label_csv_path}")

In [None]:
import os
import numpy as np
import pandas as pd

datasets = ["Muraro", 'Baron_Mouse','Baron_Human', 'Zhang_T', 'Kang_ctrl', 'AMB', 'TM', 'Zheng68K']

base_path = "../../result/LT_wcsn_preds/"
save_folder = "../Tables"

for dataset in datasets:
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)
    os.makedirs(save_data_folder, exist_ok=True)
    seq_dict_file = f'../../result/datasets/{dataset}/seq_dict.npz'
    seq_dict = np.load(seq_dict_file, allow_pickle=True) 
    str_labels = seq_dict['str_labels']

    data_file = os.path.join(base_path, f"{dataset}_a0.01_hvgs2000_prediction.h5")
    cell_type = pd.read_hdf(data_file, key='cell_type')

    true_labels = cell_type['true_cell_type'].values
    pred_labels = cell_type['pred_cell_type'].values

    true_str_labels = [str_labels[label] for label in true_labels]
    pred_str_labels = [str_labels[label] for label in pred_labels]


    true_df = pd.DataFrame(true_str_labels)
    pred_df = pd.DataFrame(pred_str_labels)
    
    true_label_csv_path = os.path.join(save_data_folder, f"wcsgnet_Log_true.csv")
    pred_label_csv_path = os.path.join(save_data_folder, f"wcsgnet_Log_pred.csv")

    true_df.to_csv(true_label_csv_path, index=False)
    pred_df.to_csv(pred_label_csv_path, index=False)   

    print(f"Saved true labels to {true_label_csv_path}")
    print(f"Saved predicted labels to {pred_label_csv_path}")

#### Based on the true.csv and pred.csv files, calculate accuracy (acc) and mean F1 score, and generate a summary table.

##### Baseline 

In [None]:
from sklearn.metrics import precision_score,f1_score, accuracy_score
import os
import numpy as np
import pandas as pd

datasets = ['Zhang_T', 'Kang_ctrl', 'Zheng68K','Baron_Human', 'Muraro', 'AMB', 'TM', 'Baron_Mouse']
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro",'AMB', 'TM', 'Baron Mouse']
methods = ["wcsgnet", "LDA", "NMC", "RF", "SVM", "SingleR", "ACTINN", "scGraph"]
method_names = ["WCSGNet", "LDA", "NMC", "RF", "SVM", "SingleR", "ACTINN", "scGraph"]


save_folder = "../Tables"
mean_f1_results = pd.DataFrame(index=method_names, columns=dataset_names)
acc_results = pd.DataFrame(index=method_names, columns=dataset_names)

for dataset, dataset_name in zip(datasets, dataset_names):
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)

    f1_results = pd.DataFrame()

    for method, method_name in zip(methods, method_names):
        print("method: ", method_name)
        true_label_csv_path = os.path.join(save_data_folder, f"{method}_true.csv")
        pred_label_csv_path = os.path.join(save_data_folder, f"{method}_pred.csv")    
        true_df = pd.read_csv(true_label_csv_path)
        pred_df = pd.read_csv(pred_label_csv_path)       

        label_true = true_df.iloc[:, 0].values  
        label_pred = pred_df.iloc[:, 0].values
        
        unique_classes = np.unique(label_true) 
              
        test_acc = accuracy_score(label_true, label_pred)
        test_f1 = f1_score(label_true, label_pred, average='macro', labels=unique_classes)
        mean_f1_results.loc[method_name, dataset_name] = test_f1
        acc_results.loc[method_name, dataset_name] = test_acc 
        print('Acc: %.03f, Mean-F1: %.03f'%(test_acc, test_f1))  

os.makedirs(os.path.join(save_folder, "baseline_F1_Acc"), exist_ok=True)
f1_path = os.path.join(save_folder, "baseline_F1_Acc", f"baseline_Mean_F1.csv")
mean_f1_results.to_csv(f1_path)
print(f"Mean-F1 scores saved to {f1_path}")

acc_path = os.path.join(save_folder, "baseline_F1_Acc", f"baseline_Acc.csv")
acc_results.to_csv(acc_path)
print(f"Accuracy saved to {acc_path}")


##### rare cell type : baseline

In [None]:
from sklearn.metrics import precision_score,f1_score, accuracy_score
import os
import numpy as np
import pandas as pd

datasets = ['Zhang_T', 'Kang_ctrl', 'Zheng68K','Baron_Human', 'Muraro', 'AMB', 'TM', 'Baron_Mouse']
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
methods = ["wcsgnet", "scGraph", "LDA", "NMC", "RF", "SVM", "SingleR", "ACTINN"]
method_names = ["WCSGNet", "scGraph", "LDA", "NMC", "RF", "SVM", "SingleR", "ACTINN"]

save_folder = "../Tables"

rare_f1_results = pd.DataFrame(index=method_names, columns=dataset_names)

for dataset, dataset_name in zip(datasets, dataset_names):
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)

    f1_results = pd.DataFrame()

    for method, method_name in zip(methods, method_names):
        print("method: ", method_name)
        true_label_csv_path = os.path.join(save_data_folder, f"{method}_true.csv")
        pred_label_csv_path = os.path.join(save_data_folder, f"{method}_pred.csv")    
        true_df = pd.read_csv(true_label_csv_path)
        pred_df = pd.read_csv(pred_label_csv_path)       

        label_true = true_df.iloc[:, 0].values 
        label_pred = pred_df.iloc[:, 0].values

        unique, counts = np.unique(label_true, return_counts=True)
        total_cells = len(label_true)
        cell_frequencies = dict(zip(unique, counts))
        rare_classes = [cell_type for cell_type, count in cell_frequencies.items() if count / total_cells < 0.03]

        if not rare_classes:
            print(f"No rare cell types in dataset {dataset_name} for method {method_name}")
            rare_f1_results.loc[method_name, dataset_name] = np.nan
            continue

        rare_f1 = f1_score(label_true, label_pred, average='macro', labels=rare_classes)
        rare_f1_results.loc[method_name, dataset_name] = rare_f1
        print(f"Rare cell type F1 score for {method_name} in {dataset_name}: {rare_f1}")
os.makedirs(os.path.join(save_folder, "baseline_F1_Acc"), exist_ok=True)

rare_cell_type_f1_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_rare_cell_type_mean-F1.csv")
rare_f1_results.to_csv(rare_cell_type_f1_path)
print(f"Rare-F1 scores saved to {rare_cell_type_f1_path}")

##### Log Transformation

In [None]:
from sklearn.metrics import precision_score,f1_score, accuracy_score
import os
import numpy as np
import pandas as pd

datasets = ['Zhang_T', 'Kang_ctrl', 'Zheng68K','Baron_Human', 'Muraro', 'AMB', 'TM', 'Baron_Mouse']
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
methods = ["wcsgnet", "wcsgnet_Log"]
method_names = ["WCSGNet", "WCSGNet(Logarithmic Transformation)"]

save_folder = "../Tables"

for dataset in datasets:
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)

    f1_results = pd.DataFrame()

    for method, method_name in zip(methods, method_names):
        print("method: ", method_name)
        true_label_csv_path = os.path.join(save_data_folder, f"{method}_true.csv")
        pred_label_csv_path = os.path.join(save_data_folder, f"{method}_pred.csv")    
        true_df = pd.read_csv(true_label_csv_path)
        pred_df = pd.read_csv(pred_label_csv_path)       

        label_true = true_df.iloc[:, 0].values  
        label_pred = pred_df.iloc[:, 0].values
        
        unique_classes = np.unique(label_true)  

        test_acc = accuracy_score(label_true, label_pred)
        test_f1 = f1_score(label_true, label_pred, average='macro', labels=unique_classes)


        test_f1_all = f1_score(label_true, label_pred, average=None, labels=unique_classes)

        if f1_results.empty:
            f1_results['Cell_Type'] = unique_classes  
        f1_results[method_name] = test_f1_all         

        print('Acc: %.03f, Mean-F1: %.03f'%(test_acc, test_f1))  

    os.makedirs(os.path.join(save_folder, "wcsgnet_F1_Acc"), exist_ok=True)
    
    dataset_csv_path = os.path.join(save_folder, "wcsgnet_F1_Acc", f"{dataset}_f1_scores.csv")
    f1_results.to_csv(dataset_csv_path, index=False)
    print(f"F1 scores saved to: {dataset_csv_path}")
    print("\n\n")


In [None]:
from sklearn.metrics import precision_score,f1_score, accuracy_score
import os
import numpy as np
import pandas as pd

datasets = ['Zhang_T', 'Kang_ctrl', 'Zheng68K', 'Baron_Human', "Muraro",'AMB', 'TM', 'Baron_Mouse']
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro",'AMB', 'TM', 'Baron Mouse']
methods = ["wcsgnet", "wcsgnet_Log"]
method_names = ["WCSGNet", "WCSGNet(Logarithmic Transformation)"]


save_folder = "../Tables"

mean_f1_results = pd.DataFrame(index=method_names, columns=dataset_names)
acc_results = pd.DataFrame(index=method_names, columns=dataset_names)

for dataset, dataset_name in zip(datasets, dataset_names):
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)

    f1_results = pd.DataFrame()

    for method, method_name in zip(methods, method_names):
        print("method: ", method_name)
        true_label_csv_path = os.path.join(save_data_folder, f"{method}_true.csv")
        pred_label_csv_path = os.path.join(save_data_folder, f"{method}_pred.csv")    
        true_df = pd.read_csv(true_label_csv_path)
        pred_df = pd.read_csv(pred_label_csv_path)       

        label_true = true_df.iloc[:, 0].values 
        label_pred = pred_df.iloc[:, 0].values

        unique_classes = np.unique(label_true)       
        test_acc = accuracy_score(label_true, label_pred)
        test_f1 = f1_score(label_true, label_pred, average='macro', labels=unique_classes)
        mean_f1_results.loc[method_name, dataset_name] = test_f1
        acc_results.loc[method_name, dataset_name] = test_acc 
        print('Acc: %.03f, Mean-F1: %.03f'%(test_acc, test_f1))  

os.makedirs(os.path.join(save_folder, "wcsgnet_F1_Acc"), exist_ok=True)
f1_path = os.path.join(save_folder, "wcsgnet_F1_Acc", f"Mean_F1_Scores.csv")
mean_f1_results.to_csv(f1_path)
print(f"Mean-F1 scores saved to {f1_path}")

acc_path = os.path.join(save_folder, "wcsgnet_F1_Acc", f"Accuracy.csv")
acc_results.to_csv(acc_path)
print(f"Accuracy saved to {acc_path}")


#### Plotting: Based on the benchmark dataset, compare machine learning methods and create bar charts: F1 and accuracy

##### Machine learning: Comparison based on F1. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
f1_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_Mean_F1.csv")
f1_results = pd.read_csv(f1_path, index_col=0)

dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "LDA", "NMC", "RF", "SVM"]

filtered_ml_f1_results = f1_results.loc[method_names]

figure_folder = "../../result/Figures"
visualization_folder = os.path.join(figure_folder, "baseline_ML_F1")
os.makedirs(visualization_folder, exist_ok=True)

colors = [
    '#D16E5D',  
    '#72B6A1',  
    '#F3C678',  
    '#95A3C3',  
    '#6DA96D',

]

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',  
                    xy=(bar.get_x() + bar.get_width() / 2, height),  
                    xytext=(0, 3),  
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8,  
                    rotation=90) 

for dataset_name in dataset_names:
    dataset_f1 = filtered_ml_f1_results[dataset_name]

    plt.figure(figsize=(2, 2))
    x = range(len(method_names)) 
    width = 0.7  
    plt.bar(x, dataset_f1, color=colors[:len(method_names)], edgecolor='grey', linewidth=0.5, width=width)
    
    plt.xlabel(dataset_name, fontsize=10, fontweight="bold")
    plt.ylim(0, 1)  

    plt.xticks(ticks=x, labels=[''] * len(method_names), fontsize=8)  

    ax = plt.gca()
    ax.spines['top'].set_visible(False) 
    ax.spines['right'].set_visible(False) 

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    add_value_labels(ax)

    plot_path = os.path.join(visualization_folder, f"{dataset_name}_ML_F1_bar.png")
    plot_path2 = os.path.join(visualization_folder, f"{dataset_name}_ML_F1_bar.svg")
    plt.savefig(plot_path, format='png', dpi=1200, bbox_inches='tight')
    plt.savefig(plot_path2, format='svg', dpi=1200, bbox_inches='tight')
    plt.show()
    plt.close()

    print(f"Bar plot saved for dataset: {dataset_name} at {plot_path}")

print(f"All bar plots saved to {visualization_folder}")

##### Machine learning: Comparison based on Acc. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
acc_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_Acc.csv")
acc_results = pd.read_csv(acc_path, index_col=0)

dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "LDA", "NMC", "RF", "SVM"]
filtered_ml_acc_results = acc_results.loc[method_names]

figure_folder = "../../result/Figures"
visualization_folder = os.path.join(figure_folder, "baseline_ML_Acc")
os.makedirs(visualization_folder, exist_ok=True)

colors = [
    '#D16E5D', 
    '#72B6A1', 
    '#F3C678', 
    '#95A3C3', 
    '#6DA96D',
]

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',  
                    xy=(bar.get_x() + bar.get_width() / 2, height),  
                    xytext=(0, 3),  
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8,  
                    rotation=90)  

for dataset_name in dataset_names:
    dataset_acc = filtered_ml_acc_results[dataset_name]

    plt.figure(figsize=(2, 2))
    x = range(len(method_names))  
    width = 0.7 
    plt.bar(x, dataset_acc, color=colors[:len(method_names)], edgecolor='grey', linewidth=0.5, width=width)
    plt.xlabel(dataset_name, fontsize=10, fontweight="bold")
    plt.ylim(0, 1)  

    plt.xticks(ticks=x, labels=[''] * len(method_names), fontsize=8)  

    ax = plt.gca()
    ax.spines['top'].set_visible(False)  
    ax.spines['right'].set_visible(False)  

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    add_value_labels(ax)

    plot_path = os.path.join(visualization_folder, f"{dataset_name}_Acc_bar.png")
    plot_path2 = os.path.join(visualization_folder, f"{dataset_name}_Acc_bar.svg")
    plt.savefig(plot_path, format='png', dpi=1200, bbox_inches='tight')
    plt.savefig(plot_path2, format='svg', dpi=1200, bbox_inches='tight')
    plt.show()
    plt.close()

    print(f"Bar plot saved for dataset: {dataset_name} at {plot_path}")

print(f"All bar plots saved to {visualization_folder}")

##### Legend drawing.

In [None]:
import matplotlib.pyplot as plt

method_names = ["WCSGNet", "LDA", "NMC", "RF", "SVM"]
colors = [
    '#D16E5D',  
    '#72B6A1', 
    '#F3C678',  
    '#95A3C3',  
    '#6DA96D',  
]

plt.figure(figsize=(9, 0.3))
for i, (method, color) in enumerate(zip(method_names, colors)):
    plt.bar(0, 0, color=color, label=method) 

plt.legend(loc='center', bbox_to_anchor=(0.5, 0.5), fontsize=10, frameon=False, ncol=5)
plt.axis('off')

legend_path = "../../result/Figures/baseline_ML_Acc/legend_only.png"
plt.savefig(legend_path, dpi=1200, bbox_inches='tight')
plt.show()

print(f"Legend saved as {legend_path}")


#### Plotting: Based on the benchmark dataset, compare deep learning methods and create bar charts: F1 and accuracy

##### Deep Learning: Comparison based on F1. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
f1_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_Mean_F1.csv")
f1_results = pd.read_csv(f1_path, index_col=0)
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "scGraph", "ACTINN"]

deep_f1_results = f1_results.loc[method_names]


WCSGNet = deep_f1_results.loc["WCSGNet", dataset_names]
scGraph = deep_f1_results.loc["scGraph", dataset_names]
ACTINN = deep_f1_results.loc["ACTINN", dataset_names]

x = np.arange(len(dataset_names))  
width = 0.25  

plt.figure(figsize=(8, 4))

plt.bar(x - width, WCSGNet, width, label='WCSGNet', color='#72B6A1', edgecolor='white', linewidth=1)
plt.bar(x, scGraph, width, label='scGraph', color='#E99675', edgecolor='white', linewidth=1)
plt.bar(x + width, ACTINN, width, label='ACTINN', color='#95A3C3', edgecolor='white', linewidth=1)

plt.xticks(x, dataset_names, fontsize=10, rotation=45)  
plt.ylim(0.4, 1.0)  

ax = plt.gca()
ax.spines['top'].set_visible(False)  
ax.spines['right'].set_visible(False) 

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), fontsize=10, ncol=3, frameon=False)

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',  
                    xy=(bar.get_x() + bar.get_width() / 2, height),  
                    xytext=(0, 3),  
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8, 
                    rotation=90)  

add_value_labels(ax)
plt.tight_layout()

plt.savefig('../../result/Figures/baseline_DL/DL_F1.png', format='png', dpi=1200, bbox_inches='tight')
plt.savefig('../../result/Figures/baseline_DL/DL_F1.svg', format='svg', dpi=1200, bbox_inches='tight')

plt.show()



##### Deep Learning: Comparison based on Acc. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
acc_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_Acc.csv")
acc_results = pd.read_csv(acc_path, index_col=0)
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "scGraph", "ACTINN"]

deep_acc_results = acc_results.loc[method_names]

WCSGNet = deep_acc_results.loc["WCSGNet", dataset_names]
scGraph = deep_acc_results.loc["scGraph", dataset_names]
ACTINN = deep_acc_results.loc["ACTINN", dataset_names]

x = np.arange(len(dataset_names)) 
width = 0.25  

plt.figure(figsize=(8, 4))

plt.bar(x - width, WCSGNet, width, label='WCSGNet', color='#72B6A1', edgecolor='white', linewidth=1)
plt.bar(x, scGraph, width, label='scGraph', color='#E99675', edgecolor='white', linewidth=1)
plt.bar(x + width, ACTINN, width, label='ACTINN', color='#95A3C3', edgecolor='white', linewidth=1)

plt.xticks(x, dataset_names, fontsize=10, rotation=45) 
plt.ylim(0.4, 1.0)  

ax = plt.gca()
ax.spines['top'].set_visible(False)  
ax.spines['right'].set_visible(False) 

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), fontsize=10, ncol=3, frameon=False)

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',  
                    xy=(bar.get_x() + bar.get_width() / 2, height), 
                    xytext=(0, 3),  
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8, 
                    rotation=90)  

add_value_labels(ax)

plt.tight_layout()

plt.savefig('../../result/Figures/baseline_DL/DL_Acc.png', format='png', dpi=1200, bbox_inches='tight')
plt.savefig('../../result/Figures/baseline_DL/DL_Acc.svg', format='svg', dpi=1200, bbox_inches='tight')

plt.show()

#### Plotting: Based on the benchmark dataset, compare SingleR method and create bar charts: F1 and accuracy

##### SingleR: F1

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
f1_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_Mean_F1.csv")
f1_results = pd.read_csv(f1_path, index_col=0)

dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "SingleR"]

sr_f1_results = f1_results.loc[method_names]

WCSGNet = sr_f1_results.loc["WCSGNet", dataset_names]
singleR = sr_f1_results.loc["SingleR", dataset_names]

x = np.arange(len(dataset_names)) 
width = 0.35 

plt.figure(figsize=(6, 4))

plt.bar(x - width/2, WCSGNet, width, label='WCSGNet', color='#72B6A1', edgecolor='white', linewidth=1)
plt.bar(x + width/2, singleR, width, label='singleR', color='#E99675', edgecolor='white', linewidth=1)

plt.xticks(x, dataset_names, fontsize=10, rotation=45) 
plt.ylim(0, 1.0)  

ax = plt.gca()
ax.spines['top'].set_visible(False)  
ax.spines['right'].set_visible(False)

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), fontsize=10, ncol=2, frameon=False)

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}', 
                    xy=(bar.get_x() + bar.get_width() / 2, height), 
                    xytext=(0, 3),  
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8,  
                    rotation=90) 

add_value_labels(ax)

plt.tight_layout()

plt.savefig('../../result/Figures/baseline_SingleR/singleR_F1.png', format='png', dpi=1200, bbox_inches='tight')
plt.savefig('../../result/Figures/baseline_SingleR/singleR_F1.svg', format='svg', dpi=1200, bbox_inches='tight')

plt.show()



##### SingleR: Acc

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
acc_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_Acc.csv")
acc_results = pd.read_csv(acc_path, index_col=0)

dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "SingleR"]

sr_acc_results = acc_results.loc[method_names]

WCSGNet = sr_acc_results.loc["WCSGNet", dataset_names]
singleR = sr_acc_results.loc["SingleR", dataset_names]

x = np.arange(len(dataset_names)) 
width = 0.35 

plt.figure(figsize=(6, 4))

plt.bar(x - width/2, WCSGNet, width, label='WCSGNet', color='#72B6A1', edgecolor='white', linewidth=1)
plt.bar(x + width/2, singleR, width, label='singleR', color='#E99675', edgecolor='white', linewidth=1)

plt.xticks(x, dataset_names, fontsize=10, rotation=45)
plt.ylim(0, 1.0) 

ax = plt.gca()
ax.spines['top'].set_visible(False) 
ax.spines['right'].set_visible(False)  

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), fontsize=10, ncol=2, frameon=False)

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}', 
                    xy=(bar.get_x() + bar.get_width() / 2, height), 
                    xytext=(0, 3), 
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8,  
                    rotation=90) 

add_value_labels(ax)

plt.tight_layout()

plt.savefig('../../result/Figures/baseline_SingleR/singleR_acc.png', format='png', dpi=1200, bbox_inches='tight')
plt.savefig('../result/Figures/baseline_SingleR/singleR_acc.svg', format='svg', dpi=1200, bbox_inches='tight')

plt.show()


#### Comparison of rare cell type annotation results.

##### Rare cell type annotation: F1. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "../Tables"
rare_cell_type_f1_path = os.path.join(save_folder, "baseline_F1_Acc", "baseline_rare_cell_type_mean-F1.csv")
rare_f1_results = pd.read_csv(rare_cell_type_f1_path, index_col=0)

dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "scGraph", "ACTINN", "LDA", "NMC", "RF", "SVM", "SingleR"]

filtered_rare_f1_results = rare_f1_results.loc[method_names]

visualization_folder = os.path.join("../../result/Figures", "rare_cell_type_mean_F1")
os.makedirs(visualization_folder, exist_ok=True)

colors = [
    '#D16E5D',  
    '#72B6A1',  
    '#F3C678', 
    '#95A3C3',
    '#6DA96D',
    '#F2B76A',  
    '#E99675',  
    '#7C92A9',  
]

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}', 
                    xy=(bar.get_x() + bar.get_width() / 2, height), 
                    xytext=(0, 3), 
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8, 
                    rotation=90) 

for dataset_name in dataset_names:
    dataset_f1 = filtered_rare_f1_results[dataset_name]
    plt.figure(figsize=(3, 2.5))
    x = range(len(method_names))  
    width = 0.7  
    plt.bar(x, dataset_f1, color=colors[:len(method_names)], edgecolor='grey', linewidth=0.5, width=width)
    plt.xlabel(dataset_name, fontsize=10, fontweight="bold")
    plt.ylim(0, 1) 
    plt.xticks(ticks=x, labels=[''] * len(method_names), fontsize=8) 
    ax = plt.gca()
    ax.spines['top'].set_visible(False) 
    ax.spines['right'].set_visible(False) 
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    add_value_labels(ax)

    plot_path = os.path.join(visualization_folder, f"{dataset_name}_rare_F1_bar.png")
    plot_path2 = os.path.join(visualization_folder, f"{dataset_name}_rare_F1_bar.svg")
    plt.savefig(plot_path, format='png', dpi=1200, bbox_inches='tight')
    plt.savefig(plot_path2, format='svg', dpi=1200, bbox_inches='tight')
    plt.show()
    plt.close()

    print(f"Bar plot saved for dataset: {dataset_name} at {plot_path}")

print(f"All bar plots saved to {visualization_folder}")

##### Legend

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Times New Roman'
method_names = ["WCSGNet", "scGraph", "ACTINN", "LDA", "NMC", "RF", "SVM", "SingleR"]

colors = [
    '#D16E5D',  
    '#72B6A1',  
    '#F3C678',  
    '#95A3C3', 
    '#6DA96D',
    '#F2B76A',  
    '#E99675',  
    '#7C92A9', 
]

plt.figure(figsize=(9, 0.3))
for i, (method, color) in enumerate(zip(method_names, colors)):
    plt.bar(0, 0, color=color, label=method) 

plt.legend(loc='center', bbox_to_anchor=(0.5, 0.5), fontsize=10, frameon=False, ncol=4)
plt.axis('off')

legend_path = "../../result/Figures/rare_cell_type_mean_F1/legend_only.png"
plt.savefig(legend_path, dpi=1200, bbox_inches='tight')
plt.show()

print(f"Legend saved as {legend_path}")
