Original implementation of Contrastive-sc method
(https://github.com/ciortanmadalina/contrastive-sc)

In [1]:
import sys
sys.path.append("..")
import argparse
from sklearn.metrics import (adjusted_rand_score, normalized_mutual_info_score, 
                             silhouette_score, calinski_harabasz_score)
from sklearn.cluster import KMeans
from sklearn import metrics

import torch
import torch.nn as nn
import copy
from tqdm.notebook import tqdm
import models
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import st_loss

import h5py
import scipy as sp
import scanpy.api as sc
from collections import Counter
import random
import utils
import loop
import pickle

import train
import os
import glob2
plt.ion()
plt.show()
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



In [2]:
!ls ../output/pickle_results/real_data/

 description.xlsx	        real_data_noise.pkl
 optimal_input_size.pkl         real_data_pca_kmeans.pkl
 real_data_1model.pkl	       'real_data_scDeepCluster (copy).pkl'
 real_data_baseline.pkl         real_data_scDeepCluster.pkl
 real_data_baseline_cpu.pkl     real_data_scDeepCluster1.pkl
 real_data_combined.pkl         real_data_scanpy.pkl
 real_data_dataset_tuning.pkl   real_data_scedar.pkl
 real_data_desc.pkl	        real_data_scrna.pkl
 real_data_layers.pkl	        real_data_scvi.pkl
 real_data_layers_new.pkl       real_data_sczi.pkl
 real_data_lr.pkl	        real_data_train_size.pkl
 real_data_nb_epochs.pkl        real_data_worm.pkl
 real_data_nb_genes.pkl


In [4]:
path = "../"

category = "real_data"

scDeepCluster = pd.read_pickle(
    f"../output/pickle_results/{category}/{category}_scDeepCluster.pkl")
scDeepCluster1 = pd.read_pickle(
    f"../output/pickle_results/{category}/{category}_scDeepCluster (copy).pkl")

pd.concat([
    scDeepCluster, scDeepCluster1[scDeepCluster1.dataset == "Quake_10x_Spleen"]
]).to_pickle(
    f"../output/pickle_results/{category}/{category}_scDeepCluster1.pkl")

In [None]:
# scvi = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scvi.pkl")
# scvi

In [5]:
for category in [ "real_data",
                 "imbalanced_data", "balanced_data",
                ]:

    df = pd.read_pickle(f"../output/pickle_results/{category}/{category}_baseline.pkl")
#     df.groupby(["temperature", "layers", "dropout", "lr"])["kmeans_ari"].mean().unstack(["layers", "lr"])

    df_k = df[[
        'dataset',
        'dropout',
        'kmeans_ari',
        'kmeans_nmi',
        'kmeans_sil',
        'kmeans_cal',
        'run',
        "t_k",
        'kmeans_pred',
    ]].rename(columns={
        'kmeans_ari': "ARI",
        'kmeans_nmi': "NMI",
        'kmeans_pred': 'pred',
        'kmeans_sil': "sil",
        'kmeans_cal': "cal",
        't_k': 'time'
    })
    df_k["method"] = "constrastive+KM"

    df_l = df[[
        'dataset',
        'dropout',
        'leiden_ari',
        'leiden_nmi',
        'leiden_sil',
        'leiden_cal',
        'run',
        't_l',
        'leiden_pred',
    ]].rename(columns={
        'leiden_ari': "ARI",
        'leiden_nmi': "NMI",
        'leiden_pred': 'pred',
        'leiden_sil': "sil",
        'leiden_cal': "cal",
        't_l': 'time'
    })

    df_l["method"] = "constrastive+LD"

    r_data = pd.read_pickle(f"../R/{category}.pkl")

    if category == "real_data":
        scDeepCluster = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scDeepCluster1.pkl")
    else: 
        scDeepCluster = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scDeepCluster.pkl")
    scDeepCluster["method"] = "scDeepCluster"

    sczi = pd.read_pickle(f"../output/pickle_results/{category}/{category}_sczi.pkl")
    sczi["method"] = "scziDesk"

    scedar = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scedar.pkl")
    scedar["method"] = "scedar"

    scanpy = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scanpy.pkl")
    scanpy["method"] = "scanpy-seurat"

    scrna = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scrna.pkl")
    scrna["method"] = "scrna"

    pca_kmeans = pd.read_pickle(f"../output/pickle_results/{category}/{category}_pca_kmeans.pkl")
    pca_kmeans["method"] = "pca_kmeans"
    
    scvi = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scvi.pkl")
    scvi["method"] = "scvi"
    

    desc = pd.read_pickle(f"../output/pickle_results/{category}/{category}_desc.pkl")
    desc["pred"] = desc["pred"].apply(lambda x: x.to_list())
    desc["method"] = "desc"

    all_data = pd.concat([df_k, df_l, r_data, desc,pca_kmeans, sczi, 
                          scDeepCluster ,scedar, scanpy, scvi, scrna])
    all_data = all_data.reset_index(drop = True)

    all_data = all_data.rename(columns={"sil": "Silhouette", "cal": "Calinski"})

    path = ".."
    if category in ["balanced_data", "imbalanced_data"]:
        files = glob2.glob(f'{path}/R/simulated_data/{category}/*.h5')
        files = [f[len(f"{path}/R/simulated_data/{category}/"):-3] for f in files]
    else:
        files = glob2.glob(f'{path}/real_data/*.h5')
        files = [f[len(f"{path}/real_data/"):-3] for f in files]
    print(files)


    if category in ["imbalanced_data", "balanced_data"]:
        sc_dropout = {
            'data_1c8': 0.3,
            'data_-1c4': 0.08,
            'data_-1c8': 0.08,
            'data_0c4': 0.17,
            'data_0c8': 0.17,
            'data_0c16': 0.17,
            'data_1.5c4': 0.38,
            'data_1c4': 0.3,
            'data_1.5c8': 0.38,
            'data_1.5c16': 0.38,
            'data_-1c16': 0.08,
            'data_1c16': 0.3,
            'data_0c32': 0.17,
            'data_1.5c32': 0.38,
            'data_1c32': 0.3,
            'data_-1c32': 0.08
        }
        all_data["dropout"] = all_data["dataset"].apply(lambda x: sc_dropout[x])
        all_data["nb_clust"]= all_data["dataset"].apply(lambda x: int(x.split("c")[1]))
#     ordered_methods = [
#         'constrastive+KM',
#         'constrastive+LD',
#         'scziDesk',
#         'scDeepCluster',
#         'desc',
#         'scanpy-seurat',
#         'scedar',
#         'scrna',
#         'soup',
#         'scvi',
#         'raceid',
#         'cidr',
#         'pca_kmeans',
#     ]

    ordered_methods = [
        'constrastive+KM',
        'scziDesk',
        'scDeepCluster',
        'scrna',
        'cidr',
        'soup',
        'pca_kmeans',
        'constrastive+LD',
        'desc',
        'scanpy-seurat',
        'scedar',
        'scvi',
        'raceid',  
    ]
    ordered_methods = dict(zip(ordered_methods, np.arange(len(ordered_methods))))

    all_data["order"] = all_data["method"].apply(lambda x: ordered_methods[x])

    all_data.to_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")

['Quake_Smart-seq2_Trachea', 'Quake_Smart-seq2_Diaphragm', 'Quake_10x_Spleen', 'Young', 'mouse_ES_cell', 'Adam', 'Quake_10x_Bladder', 'Quake_Smart-seq2_Lung', 'Quake_10x_Limb_Muscle', 'worm_neuron_cell', 'mouse_bladder_cell', 'Romanov', 'Quake_Smart-seq2_Limb_Muscle', 'Muraro', '10X_PBMC']
['data_1c8', 'data_-1c4', 'data_-1c8', 'data_0c4', 'data_0c8', 'data_0c16', 'data_1.5c4', 'data_1c4', 'data_1.5c8', 'data_1.5c16', 'data_-1c16', 'data_1c16']
['data_1c8', 'data_-1c4', 'data_-1c8', 'data_0c4', 'data_0c8', 'data_0c16', 'data_1.5c4', 'data_1c4', 'data_1.5c8', 'data_1.5c16', 'data_-1c16', 'data_1c16']


In [None]:
#     ordered_methods = [
#         'constrastive+KM',
#         'scziDesk',
#         'scDeepCluster',
#         'scrna',
#         'cidr',
#         'soup',
#         'pca_kmeans',
#         'constrastive+LD',
#         'desc',
#         'scanpy-seurat',
#         'scedar',
#         'scvi',
#         'raceid',
        
        
#     ]

In [None]:
category = "imbalanced_data"
# category = "balanced_data"
# category = "real_data"
all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")

# all_data["nb_pred_clust"] = all_data["pred"].apply(lambda x: np.unique(x).shape[0])
# all_data["nb_pred_clust"] = all_data.apply(lambda x: x["nb_pred_clust"]/x["nb_clust"], axis = 1)

In [None]:
# all_data[all_data["method"] == "desc"]

In [None]:
import seaborn as sns

clrs = ['#C0392B', "#F1948A", "#D7BDE2", "#8E44AD", "#7FB3D5", "#2874A6", "#76D7C4", "#117A65",
        '#00ff00',
        "#F1C40F", "#EB984E", "#839192", "#566573"
       ]
len(clrs)

In [None]:
category = "imbalanced_data"
all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")
rank = all_data.groupby(["dataset", "method"])["ARI"].mean().unstack("method").round(3).T
rank = rank.rank(ascending=False, method = "min")
rankKM = rank.T["constrastive+KM"]
rankLD = rank.T["constrastive+LD"]
rankKM

rank = pd.concat([rankKM, rankLD], axis = 1)

rank["rank"] = rank.apply(lambda x: min(x.values), axis = 1)

rank = rank["rank"] 
rank

# Overall Comparaision

In [None]:
plt.figure(figsize = (20, 13))
category = "balanced_data"
all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")
all_data["label"] =all_data.apply(lambda x : f"dp: {x['dropout']}\n{x['nb_clust']} clust", axis = 1)
all_data["nb_pred_clust"] = all_data["pred"].apply(lambda x: np.unique(x).shape[0])
all_data["nb_pred_clust"] = all_data.apply(lambda x: x["nb_pred_clust"]/x["nb_clust"], axis = 1)
rank = all_data.groupby(["dataset", "method"])["ARI"].mean().unstack("method").round(3).T
rank = rank.rank(ascending=False, method = "min")
rankKM = rank.T["constrastive+KM"]
rankLD = rank.T["constrastive+LD"]
ax = plt.subplot(411)
sns.barplot(x = "label", y="ARI", 
                data = all_data.sort_values(by=["dropout", 'nb_clust', "order"]),
                hue = "method",
               palette=clrs,
               edgecolor='black',
        linewidth=0.8)
plt.xticks(fontsize=13)
# Ranking
seld = all_data.sort_values(by=["dropout", 'nb_clust', "order"])["dataset"].unique()
for i, d in enumerate(seld):
    plt.text(i-0.1, 0.98, f"#{int(rankKM[d])}, #{int(rankLD[d])}", fontsize=13)
plt.legend(bbox_to_anchor=(0, 1.45), loc=2, borderaxespad=0., ncol=10, fontsize=13)
sns.despine()
plt.xlabel("")
plt.title("(a) Detailed Balanced data")

ax = plt.subplot(425)
sns.barplot(x = "method", y="ARI", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
for i in range(7,13):
    plt.text(i, 0.80, "*")
plt.xlabel("")
plt.title("(c) Average Balanced data")
ax = plt.subplot(427)
sns.barplot(x = "method", y="nb_pred_clust", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
plt.xticks(fontsize=13,  rotation = 70)
plt.axhline(y=1, c = "black")
plt.ylabel("Nb predicted clusters")
sns.despine()
for i in range(7,13):
    plt.text(i, 2, "*")
plt.title(f"(e)Balanced data - Nb predicted clusters precision")
plt.xlabel("")
sns.despine()
###########################################
category = "imbalanced_data"
all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")
all_data["label"] =all_data.apply(lambda x : f"dp: {x['dropout']}\n{x['nb_clust']} clust", axis = 1)
all_data["nb_pred_clust"] = all_data["pred"].apply(lambda x: np.unique(x).shape[0])
all_data["nb_pred_clust"] = all_data.apply(lambda x: x["nb_pred_clust"]/x["nb_clust"], axis = 1)
rank = all_data.groupby(["dataset", "method"])["ARI"].mean().unstack("method").round(3).T
rank = rank.rank(ascending=False, method = "min")
rankKM = rank.T["constrastive+KM"]
rankLD = rank.T["constrastive+LD"]

ax = plt.subplot(412)
sns.barplot(x = "label", y="ARI", 
                data = all_data.sort_values(by=["dropout", 'nb_clust', "order"]),
                hue = "method",
               palette=clrs,
               edgecolor='black',
        linewidth=0.8)
# Ranking
seld = all_data.sort_values(by=["dropout", 'nb_clust', "order"])["dataset"].unique()
for i, d in enumerate(seld):
    plt.text(i-0.1, 0.96, f"#{int(rankKM[d])}, #{int(rankLD[d])}", fontsize=13)
    
plt.legend([])
sns.despine()
plt.xlabel("")
plt.xticks(fontsize=13)
plt.title("(b) Detailed Imbalanced data")
ax = plt.subplot(426)
sns.barplot(x = "method", y="ARI", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
for i in range(7,13):
    plt.text(i, 0.72, "*")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
# plt.xticks(fontsize=13, rotation = 70)
plt.title(f"(d)Average Imbalanced data")
plt.xlabel("")
sns.despine()

ax = plt.subplot(428)
sns.barplot(x = "method", y="nb_pred_clust", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
           linewidth=1.5
           )
plt.axhline(y=1, c = "black")
plt.ylabel("Nb predicted clusters ")
plt.xticks(fontsize=13,  rotation = 70)
for i in range(7,13):
    plt.text(i, 4, "*")
plt.title(f"(f)Imbalanced data - Nb predicted clusters precision")
sns.despine()
plt.xlabel("")
plt.tight_layout()
plt.savefig(f"../diagrams/simulated.pdf", bbox_inches='tight')

In [None]:
category = "real_data"
desc = pd.read_excel("../output/pickle_results/real_data/description.xlsx")
all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")
all_data = pd.merge(all_data, desc[["dataset", "nb_clusters"]], on = "dataset")
all_data["nb_pred_clust"] = all_data["pred"].apply(lambda x: np.unique(x).shape[0])
all_data["nb_pred_clust"] = all_data.apply(lambda x: x["nb_pred_clust"]/x["nb_clusters"], axis = 1)

In [None]:
all_data.groupby("method").agg({"time": "mean"})

In [None]:
plt.figure(figsize=(14, 8))
letters = ["(a)", "(b)", "(c)", "(d)",
           "(e)", "(f)","(g)" ,"(h)",
           "(i)","(j)","(k)", "(l)"]
titles = ["real data"]
nb_rows=2
i = 0
ax = plt.subplot(nb_rows,3,i*4 +1)
sns.barplot(x = "method", y="ARI", data = all_data.sort_values(by="order"),
           palette=clrs,
           edgecolor='black',
    linewidth=1.5)
plt.xticks(rotation = 90)
plt.title(f"{letters[i*4]} ARI {titles[i]}")
plt.xlabel("")
sns.despine()
for j in range(7,13):
    plt.text(j, 0.7, "*")

ax = plt.subplot(nb_rows,3,i*4 +2)
sns.barplot(x = "method", y="NMI", data = all_data.sort_values(by="order"),
           palette=clrs,
           edgecolor='black',
    linewidth=1.5)
plt.xticks(rotation = 90)
plt.xlabel("")
plt.title(f"{letters[i*4 +1]} NMI {titles[i]}")
for j in range(7,13):
    plt.text(j, 0.8, "*")

ax = plt.subplot(nb_rows,3,i*4 +3)
sns.barplot(x = "method", y="Silhouette", data = all_data.sort_values(by="order"),
           palette=clrs,
           edgecolor='black',
    linewidth=1.5)
plt.xticks(rotation = 90)
plt.title(f"{letters[i*4 +2]} Silhouette {titles[i]}")
plt.xlabel("")
for j in range(7,13):
    plt.text(j, 0.6, "*")
sns.despine()

## Nb predicted clusters
ax = plt.subplot(nb_rows,3,i*4 +4)
g= sns.barplot(x = "method", y="Calinski", data = all_data.sort_values(by="order"),
           palette=clrs,
           edgecolor='black',
    linewidth=1.5)
g.set(yscale="log")
plt.xticks(rotation = 90)
plt.ylabel("log Calinski")
plt.title(f"{letters[i*4 +3]}  Calinski {titles[i]}")
plt.xlabel("")
for j in range(7,13):
    plt.text(j, 20000, "*")
sns.despine()
    
    
ax = plt.subplot(nb_rows, 3, 5)
g = sns.barplot(x = "method", y="time", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
           linewidth=1.5
           )
g.set(yscale="log")
plt.axhline(y=1, c = "black")
plt.ylabel("log seconds")
plt.xticks(rotation = 90)
for j in range(7,13):
    plt.text(j, 3000, "*")
plt.title(f"(e) Execution time")
sns.despine()
plt.xlabel("")


ax = plt.subplot(nb_rows, 3, 6)
sns.barplot(x = "method", y="nb_pred_clust", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
           linewidth=1.5
           )
plt.axhline(y=1, c = "black")
plt.ylabel("Nb predicted clusters")
plt.xticks(rotation = 90)
for j in range(7,13):
    plt.text(j, 4, "*")
plt.title(f"(f) Nb predicted clusters")
sns.despine()
plt.xlabel("")

plt.tight_layout()
plt.savefig(f"../diagrams/real.pdf", bbox_inches='tight')

In [None]:
all_data.corr()

In [None]:
all_data.groupby("method").mean()

In [None]:
# titles = ["(a)", "(b)", "(c)", "(d)"]
# plt.figure(figsize = (12, 12))
# category = "balanced_data"
# all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")

# for i, dropout in enumerate(sorted(all_data.dropout.unique())):
#     ax = plt.subplot(4, 2, i+1)
#     sns.barplot(x = "nb_clust", y="ARI", 
#                 data = all_data[all_data["dropout"] == dropout].sort_values(by=["nb_clust", "order"]),
#                 hue = "method",
#                palette=clrs,
#                edgecolor='black',
#         linewidth=1.5)

#     plt.title(f"{titles[i]} Balanced data, dropout rate {dropout} %")
#     if i ==1:
#         plt.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)
# #         plt.legend(bbox_to_anchor=(0., 1.4), loc=2, borderaxespad=0., ncol=7)
#     else: 
#         plt.legend([],[], frameon=False)

#     plt.xlabel("")
#     sns.despine()
    
    
# category = "imbalanced_data"
# all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")
# titles = ["(e)", "(f)", "(g)", "(g)"]
# for i, dropout in enumerate(sorted(all_data.dropout.unique())):
#     ax = plt.subplot(4, 2, i+5)
#     sns.barplot(x = "nb_clust", y="ARI", 
#                 data = all_data[all_data["dropout"] == dropout].sort_values(by=["nb_clust", "order"]),
#                 hue = "method",
#                palette=clrs,
#                edgecolor='black',
#         linewidth=1.5)

#     plt.title(f"{titles[i]} Imbalanced data, dropout rate {dropout} %")

#     plt.legend([],[], frameon=False)
#     if i in [2, 3]:
#         plt.xlabel("Nb. of clusters")
#     else:
#         plt.xlabel("")
#     sns.despine()
# plt.tight_layout()
# plt.savefig(f"../diagrams/{category}_barplot_by_dropout.pdf", bbox_inches='tight')

In [None]:
# titles = ["(a)", "(b)", "(c)", "(d)"]
# plt.figure(figsize = (12, 5))
# for i, dropout in enumerate(sorted(all_data.dropout.unique())):
#     ax = plt.subplot(2, 2, i+1)
#     sns.barplot(x = "nb_clust", y="ARI", 
#                 data = all_data[all_data["dropout"] == dropout].sort_values(by=["nb_clust", "order"]),
#                 hue = "method",
#                palette=clrs,
#                edgecolor='black',
#         linewidth=1.5)

#     plt.title(f"{titles[i]} dropout rate {dropout} %")
#     if i ==1:
#         plt.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)
#     else: 
#         plt.legend([],[], frameon=False)
#     if i in [2, 3]:
#         plt.xlabel("Nb. of clusters")
#     else:
#         plt.xlabel("")
#     sns.despine()
# # plt.tight_layout()
# plt.savefig(f"../diagrams/{category}_barplot_by_dropout.pdf", bbox_inches='tight')

In [None]:
plt.figure(figsize=(14, 12))
letters = ["(a)", "(b)", "(c)", "(d)",
           "(e)", "(f)","(g)" ,"(h)",
           "(i)","(j)","(k)", "(l)"]
titles = ["balanced data", "imbalanced data", "real data"]
for i, category in enumerate (["balanced_data", "imbalanced_data", "real_data"] ):
    all_data = pd.read_pickle(f"../output/pickle_results/{category}/{category}_combined.pkl")
    
    ax = plt.subplot(3,4,i*4 +1)
    sns.barplot(x = "method", y="ARI", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
    plt.xticks(rotation = 90)
    plt.title(f"{letters[i*4]} ARI {titles[i]}")
    plt.xlabel("")
    sns.despine()

    ax = plt.subplot(3,4,i*4 +2)
    sns.barplot(x = "method", y="NMI", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
    plt.xticks(rotation = 90)
    plt.xlabel("")
    plt.title(f"{letters[i*4 +1]} NMI {titles[i]}")

    ax = plt.subplot(3,4,i*4 +3)
    sns.barplot(x = "method", y="Silhouette", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
    plt.xticks(rotation = 90)
    plt.title(f"{letters[i*4 +2]} Silhouette {titles[i]}")
    plt.xlabel("")
    sns.despine()

    ax = plt.subplot(3,4,i*4 +4)
    g= sns.barplot(x = "method", y="Calinski", data = all_data.sort_values(by="order"),
               palette=clrs,
               edgecolor='black',
        linewidth=1.5)
    g.set(yscale="log")
    plt.xticks(rotation = 90)
    plt.title(f"{letters[i*4 +3]}  Calinski {titles[i]}")
    plt.xlabel("")
    sns.despine()
plt.tight_layout()
plt.savefig(f"../diagrams/all_barplot.pdf", bbox_inches='tight')