In [None]:

import os
import numpy as np
import pandas as pd
datasets = ["Muraro", 'Baron_Mouse', 'Segerstolpe', 'Baron_Human', 'Zhang_T', 'Kang_ctrl', 'AMB', 'TM', 'Zheng68K']

base_path = "../../result/preds/"
save_folder = "data/"

for dataset in datasets:
    print("processing dataset: ", dataset)
    save_data_folder = os.path.join(save_folder, dataset)
    os.makedirs(save_data_folder, exist_ok=True)
    seq_folder = os.path.join('../../dataset/5fold_data/', dataset)    
    seq_dict_file = os.path.join(seq_folder, 'seq_dict.npz')  
    seq_dict = np.load(seq_dict_file, allow_pickle=True) 
    str_labels = seq_dict['str_labels']

    data_file = os.path.join(base_path, f"{dataset}_a0.01_hvgs2000_prediction.h5")
    cell_type = pd.read_hdf(data_file, key='cell_type')
    true_labels = cell_type['true_cell_type'].values
    pred_labels = cell_type['pred_cell_type'].values

    true_str_labels = [str_labels[label] for label in true_labels]
    pred_str_labels = [str_labels[label] for label in pred_labels]


    true_df = pd.DataFrame(true_str_labels)
    pred_df = pd.DataFrame(pred_str_labels)
    
    true_label_csv_path = os.path.join(save_data_folder, f"wcsgnet_true.csv")
    pred_label_csv_path = os.path.join(save_data_folder, f"wcsgnet_pred.csv")

    true_df.to_csv(true_label_csv_path, index=False)
    pred_df.to_csv(pred_label_csv_path, index=False)   

    print(f"Saved true labels to {true_label_csv_path}")
    print(f"Saved predicted labels to {pred_label_csv_path}")
    

In [None]:
from sklearn.metrics import precision_score,f1_score, accuracy_score
import os
import numpy as np
import pandas as pd

datasets = ['Zhang_T', 'Kang_ctrl', 'Zheng68K','Baron_Human', 'Muraro', 'Segerstolpe', 'AMB', 'TM', 'Baron_Mouse']
dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'Segerstolpe','AMB', 'TM', 'Baron Mouse']
methods = ["wcsgnet", "scGraph", "LDA", "NMC", "RF", "SVM", "SingleR", "CHETAH", "ACTINN"]
method_names = ["WCSGNet", "scGraph", "LDA", "NMC", "RF", "SVM", "SingleR", "CHETAH", "ACTINN"]

save_folder = "data" 

rare_f1_results = pd.DataFrame(index=method_names, columns=dataset_names)

for dataset, dataset_name in zip(datasets, dataset_names):
    print("processing dataset: ", dataset)
    # the folder saves all the true labels and pred labels of WCSGNet and baseline methods on nine scRNA-seq datasets.
    save_data_folder = os.path.join(save_folder, dataset) 

    f1_results = pd.DataFrame()

    for method, method_name in zip(methods, method_names):
        print("method: ", method_name)
        true_label_csv_path = os.path.join(save_data_folder, f"{method}_true.csv")
        pred_label_csv_path = os.path.join(save_data_folder, f"{method}_pred.csv")    
        true_df = pd.read_csv(true_label_csv_path)
        pred_df = pd.read_csv(pred_label_csv_path)       

        label_true = true_df.iloc[:, 0].values  
        label_pred = pred_df.iloc[:, 0].values

        unique, counts = np.unique(label_true, return_counts=True)
        total_cells = len(label_true)
        cell_frequencies = dict(zip(unique, counts))
        rare_classes = [cell_type for cell_type, count in cell_frequencies.items() if count / total_cells < 0.03]

        if not rare_classes:
            print(f"No rare cell types in dataset {dataset_name} for method {method_name}")
            rare_f1_results.loc[method_name, dataset_name] = np.nan
            continue

        rare_f1 = f1_score(label_true, label_pred, average='macro', labels=rare_classes)
        rare_f1_results.loc[method_name, dataset_name] = rare_f1
        print(f"Rare cell type F1 score for {method_name} in {dataset_name}: {rare_f1}")


rare_cell_type_f1_path = os.path.join(save_folder,  "baseline_rare_cell_type_mean-F1.csv")
rare_f1_results.to_csv(rare_cell_type_f1_path)
print(f"Rare-F1 scores saved to {rare_cell_type_f1_path}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

plt.rcParams['font.family'] = 'Times New Roman'

save_folder = "data/"
rare_cell_type_f1_path = os.path.join(save_folder, "baseline_rare_cell_type_mean-F1.csv")
rare_f1_results = pd.read_csv(rare_cell_type_f1_path, index_col=0)

dataset_names = ['Zhang T', 'Kang', 'Zheng 68k', 'Baron Human', "Muraro", 'Segerstolpe', 'AMB', 'TM', 'Baron Mouse']
method_names = ["WCSGNet", "scGraph", "LDA", "NMC", "RF", "SVM", "SingleR", "CHETAH", "ACTINN"]

visualization_folder = "../../result/Figures/"
os.makedirs(visualization_folder, exist_ok=True)

colors = [
    '#D16E5D',  
    '#72B6A1',  
    '#F3C678', 
    '#95A3C3', 
    '#6DA96D',
    '#F2B76A', 
    '#E99675',  
    '#7C92A9', 
    '#C9A585'   
]

def add_value_labels(ax):
    for bar in ax.patches:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',  
                    xy=(bar.get_x() + bar.get_width() / 2, height),  
                    xytext=(0, 3),  
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8,  
                    rotation=90) 

for dataset_name in dataset_names:
    dataset_f1 = rare_f1_results[dataset_name]

    plt.figure(figsize=(3, 2.5))
    x = range(len(method_names))  
    width = 0.7  
    plt.bar(x, dataset_f1, color=colors[:len(method_names)], edgecolor='grey', linewidth=0.5, width=width)
    
    plt.xlabel(dataset_name, fontsize=10, fontweight="bold")
    plt.ylim(0, 1)  
    plt.xticks(ticks=x, labels=[''] * len(method_names), fontsize=8)  

    ax = plt.gca()
    ax.spines['top'].set_visible(False)  
    ax.spines['right'].set_visible(False)  

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    add_value_labels(ax)

    plot_path = os.path.join(visualization_folder, f"{dataset_name}_rare_F1_bar.png")
    plot_path2 = os.path.join(visualization_folder, f"{dataset_name}_rare_F1_bar.svg")
    plt.savefig(plot_path, format='png', dpi=1200, bbox_inches='tight')
    plt.savefig(plot_path2, format='svg', dpi=1200, bbox_inches='tight')
    plt.show()
    plt.close()

    print(f"Bar plot saved for dataset: {dataset_name} at {plot_path}")

print(f"All bar plots saved to {visualization_folder}")