RANK PLOTTER FOR BN NETWORKS:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import rankdata, friedmanchisquare
from Orange.evaluation import compute_CD, graph_ranks


df = pd.DataFrame()
datasets = ["ecoli70","magic_niab","magic_irri","arth150","healthcare","sangiovese", "mehra"]
for dataset_name in datasets:

    file_path = "save/save_" + dataset_name + "/" + dataset_name + "_CER.csv" # WRITE "CER" OR "UNC" OR "TP" DEPENDING OF THE SCORE YOU WANT 

    dicc = pd.read_csv(file_path, usecols=lambda column: column != 'Unnamed: 0')
   
    df = pd.concat([df, dicc], ignore_index=True)
    

# Example performance results of classifiers on different datasets
performance_data = df.to_numpy()

print(df)  
# Rank the classifiers for each dataset
ranks = np.array([rankdata(p) for p in performance_data]) # IF SCORE IS "CER" write "rankdata(-p)"
print(f"Ranks:\n{ranks}")

# Compute the average ranks of the classifiers
average_ranks = np.mean(ranks, axis=0)
print(f"Average ranks:\n{average_ranks}")

# Perform the Friedman test
_, p_value = friedmanchisquare(*performance_data.T)
print(f"Friedman test p-value: {p_value}")

# Compute the critical difference
cd = compute_CD(average_ranks, n=len(performance_data), alpha='0.05')
print(f"Critical Difference: {cd}")

# Prepare names with ranks
names = [f"{ele}" for ele, rank in zip(df.columns, average_ranks)]

# Plot the CD diagram
graph_ranks(average_ranks, names=names, cd=cd, width=10, textspace=1.5,  reverse=True)


#plt.savefig('save/save_PLOTS/CD_UNC_BN.pdf')

plt.show()

RANK PLOTTER FOR REAL WORLD DATASETS

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import rankdata, friedmanchisquare
from Orange.evaluation import compute_CD, graph_ranks


df_KNN = pd.DataFrame()
df_SVM = pd.DataFrame()
datasets = ["ionosphere","sonar","myocardial","spambase","toxicity", "breast", "IMV_ABA"]

for dataset_name in datasets:

    file_path = "save/save_" + dataset_name + "/Best_conformal_scores" + dataset_name + ".csv"

    dicc = pd.read_csv(file_path)
    dicc = dicc.drop(columns=['P', 'P.1', 'P.2', 'P.3'])
    

    dicc = dicc.set_index(dicc.columns[0])
    dicc.index = dicc.index.astype(str)
      
    dicc_KNN = dicc[dicc.index.str.contains("KNN")]
    dicc_SVM = dicc[dicc.index.str.contains("SVM")]


    df_SVM = pd.concat([df_SVM, dicc_SVM .T], ignore_index=False)
    df_KNN = pd.concat([df_KNN, dicc_KNN.T], ignore_index=False)
    



performance_scores = ["inefficiency", "certainty"]
for name in performance_scores:

    df = pd.DataFrame()
    df = df_KNN[df_KNN.index.str.contains(rf'\b{name}\b')] # CHANGE BETWEEN "KNN" AND "SVM" HERE:  "df_SVM[df_SVM.index.str.contains(rf'\b{name}\b')]"
    print(df)
    # Example performance results of classifiers on different datasets
    performance_data = df.to_numpy()

      
    # Rank the classifiers for each dataset
    if name == "inefficiency":
        ranks = np.array([rankdata(p) for p in performance_data])
    else:
        ranks = np.array([rankdata(-p) for p in performance_data])
    print(f"Ranks:\n{ranks}")
    
    # Compute the average ranks of the classifiers
    average_ranks = np.mean(ranks, axis=0)
    print(f"Average ranks:\n{average_ranks}")

# Perform the Friedman test
    _, p_value = friedmanchisquare(*performance_data.T)
    print(f"Friedman test p-value: {p_value}")

    # Compute the critical difference
    cd = compute_CD(average_ranks, n=len(performance_data), alpha='0.05')
    print(f"Critical Difference: {cd}")

    # Prepare names with ranks
    names = [f"{ele}" for ele, rank in zip(df.columns, average_ranks)]

    # Plot the CD diagram
    graph_ranks(average_ranks, names=names, cd=cd, width=10, textspace=1.5,  reverse=True)


    plt.savefig('save/save_PLOTS/CD_'+ name +'_KNN.pdf')

    plt.show()

STABILITY RANK PLOT

In [None]:
import numpy as np
import pandas as pd
import pickle
import json
import os
import matplotlib.pyplot as plt
from scipy.stats import rankdata, friedmanchisquare
from Orange.evaluation import compute_CD, graph_ranks


datasets = ["ionosphere","sonar","myocardial","spambase", "toxicity", "breast", "IMV_ABA"]
list_of_scores = [ "Nogue"]
list_of_methods = [ "mRMR_MS_linear" , "mRMR_MS_poly",  "mRMR_MS_rbf", "mRMR" ] # INCLUDE AT THE END OF THE LIST: "JMI", "mRMR", "relax_mRMR"


fig, ax = plt.subplots(figsize=(12, 6))  # 1 row, 2 columns

for score in list_of_scores:

    stacked_arrays = [] 
    for dataset_name in datasets:

        performance_of_methods = np.array([])
        for method in list_of_methods:
                
            file_path = "save/save_" + dataset_name + "/" + "STABILITY_" +  dataset_name + "_" +  method + ".csv"

            csv = pd.read_csv(file_path)        
            performance_data = csv[score].to_numpy()
            x = [*range(1 , 50)]

               
            if performance_of_methods.size == 0:
                performance_of_methods = performance_data
                    
            else:
                performance_of_methods = np.vstack((performance_of_methods, performance_data))
                
        
        # Rank the classifiers for each dataset
            
        rank = np.array([rankdata(-performance_of_methods[:, i]) for i in range(performance_of_methods.shape[1])])
                
        if rank.shape[0] != 50:  
            new_array = np.full((50,len(list_of_methods)), np.nan)
            new_array[:rank.shape[0], :rank.shape[1]] = rank
            rank = new_array
            
        stacked_arrays.append(rank)

    # Stack all arrays along a new axis to form a 3D array
    rank_of_datasets = np.stack(stacked_arrays, axis=0)
    average_ranks = np.nanmean(rank_of_datasets, axis=0)
            
        

    # Assuming average_ranks is a 2D numpy array
    num_columns = average_ranks.shape[1]  # Get the number of columns

    colors = [  'm', 'y', 'k', 'b', 'g', 'r']
    markers = [ 'D', 'v', '<', '>', 'o', 's', '^']
    linestyles = ['-', '--', '-.', ':', '-', '--', '-.']

    for i in range(num_columns):
        ax.plot(average_ranks[:, i], color=colors[i], marker=markers[i], linestyle=linestyles[i], label=f'{list_of_methods[i]}')

    # Adding labels and title for the first plot

# Set labels, title, and other properties for the single plot
    ax.set_xlabel('Top ranked features', fontsize=14)
    ax.set_ylabel('Average Rank', fontsize=14)
    ax.set_title('Stability', fontsize=14)
    ax.set_ylim(0.85, 6.1)  # Ensure y-axis is between 0 and 7
    ax.tick_params(axis='both', which='major', labelsize=12)
    ax.grid(color='gray', linestyle='--', linewidth=0.5, alpha=0.7)


# Create a single legend for both plots
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.005), ncol=6, fontsize=13)

# Adjust layout
plt.tight_layout(rect=[0.01, 0.08, 1, 1]) 

plt.savefig('save/save_PLOTS/STABILITY_ranks_mRMR.pdf')
plt.show()
plt.clf()