# Train a `bioLORD` model with `developing human immune across tissue` for `bioLORD` (B-cells)

The data was generated by Suo et al.[[1]](https://www.science.org/doi/full/10.1126/science.abo0510) and downloaded from [Lymphoid cells](https://cellgeni.cog.sanger.ac.uk/developmentcellatlas/fetal-immune/PAN.A01.v01.raw_count.20210429.LYMPHOID.embedding.h5ad). <br>
The complete dataset contains a cross-tissue single-cell atlas of developing human immune cells across prenatal hematopoietic, lymphoid, and nonlymphoid peripheral organs. This includes over 900,000 cells from which we identified over 100 cell states.

[[1] Suo, Chenqu, Emma Dann, Issac Goh, Laura Jardine, Vitalii Kleshchevnikov, Jong-Eun Park, Rachel A. Botting et al. "Mapping the developing human immune system across organs." Science (2022): eabo0510.](https://www.science.org/doi/full/10.1126/science.abo0510)


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
sys.path.append("/cs/usr/bar246802/bar246802/SandBox2023/biolord_immune_bcells/utils") # add utils
sys.path.append("/cs/usr/bar246802/bar246802/SandBox2023/biolord") # set path)

In [3]:
import biolord
import scanpy as sc
import anndata
import numpy as np
import pandas as pd
from os.path import exists
import torch
import umap.plot
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
from cluster_analysis import *
from formatters import *

[rank: 0] Global seed set to 0
Matplotlib is building the font cache; this may take a moment.


In [4]:
print(f"PyTorch version: {torch.__version__}")
# Set the device      
device = "gpu" if torch.backends.cuda.is_built() else "cpu"
print(f"Using device: {device}")

PyTorch version: 1.13.1+cu117
Using device: gpu


In [5]:
from tqdm import tqdm
tqdm(disable=True, total=0)  # initialise internal lock

<tqdm.std.tqdm at 0x7f2364391820>

In [6]:
import mplscience
mplscience.set_style()

plt.rcParams['legend.scatterpoints'] = 1

## Set parameters

In [7]:
DATA_DIR = "../data/"
SAVE_DIR = "../output/"
FIG_DIR = "../figures/"
LOGS_CSV = SAVE_DIR + "trained_models_scores.csv"

## Import processed data

In [8]:
adata = sc.read(DATA_DIR + "biolord_immune_bcells_bm.h5ad")

In [9]:
adata.obs["split"].value_counts()

train    57436
test      6382
ood        160
Name: split, dtype: int64

In [10]:
def cluster_evaluate_figures(attribute_):
    ground_truth_labels = np.array(df[attribute_ + '_key'])
    print("Number of samples:", ground_truth_labels.size)
    title = "Attribute: " + attribute_ 
    path = FIG_DIR + attribute_ + "_"
    scores = matrices_figures(transf_embeddings_attributes, ground_truth_labels, df,
                        attributes_map_rev, attribute_, title, path)

In [11]:
def cluster_evaluate(model, id_, attributes = ['celltype', 'organ']):
    transf_embeddings_attributes, df = get_transf_embeddings_attributes(model)
    all_scores = None
    for attribute in attributes:
        ground_truth_labels = np.array(df[attribute + '_key'])
        ground_truth_unique_labels = list(set(ground_truth_labels))
        print(f'For attribute {attribute} the # of unique true labels is: {len(ground_truth_unique_labels)}')
        path = SAVE_DIR + attribute + "_"
        n_clusters_range = np.arange(2, 16).astype(int)
        scores = get_kmeans_score(transf_embeddings_attributes, ground_truth_labels, n_clusters_range=n_clusters_range, id_=id_, save_path=path)
        scores['attribute'] = attribute
        if all_scores is not None:
            all_scores = pd.concat([all_scores, scores], ignore_index=True)
        else:
            all_scores = scores
    cols = ['attribute', 'score_name', 'score', 'n_clusters']
    all_scores = all_scores[cols]
    print(all_scores)
    return all_scores

In [12]:
def split_adata_into_train_test():
    from sklearn.model_selection import train_test_split
    adata.obs['split'] = 'nan'
    ood_samples = adata.obs.sample(frac = 0.0025, random_state=42).index
    adata.obs.loc[ood_samples, "split"] = 'ood'

    adata_idx = adata.obs_names[adata.obs["split"] != 'ood']
    adata_idx_train, adata_idx_test = train_test_split(adata_idx, test_size=0.1, random_state=42)
    adata.obs.loc[adata_idx_train, "split"] = 'train'
    adata.obs.loc[adata_idx_test, "split"] = 'test'
    a = adata.obs['split'].value_counts()
    print("Simaple value count of train, test, OOD:")
    print(a)
    print("\n")
    print("Train, test, OOD by percentage:")
    p = adata.obs['split'].value_counts(normalize=True) * 100
    print(p)

In [13]:
def train_model(module_params, trainer_params):
    # before each train we wish to re-split the data to make sure we are not biased to a certain split
    split_adata_into_train_test()
    model = biolord.Biolord(
        adata=adata,
        n_latent=32,
        model_name="immune_bcells",
        module_params=module_params,
        train_classifiers=False,
        split_key="split",
    )

    model.train(max_epochs=1000,
            use_gpu=True,
            batch_size=512,
            plan_kwargs=trainer_params,
            early_stopping=True,
            early_stopping_patience=20,
            check_val_every_n_epoch=10,
            enable_checkpointing=False,
            num_workers=1)
    return model

In [14]:
def get_model_id():
    id_ = 1
    if exists(LOGS_CSV):
        df_logs = pd.read_csv(LOGS_CSV)
        id_ = df_logs['id_'].max()
        if id_.isnumeric():
            id_ += 1
        else:
            id_ = 1
    return id_

In [15]:
def model_training_iterations():
    arr_n_latent_attribute_categorical = np.concatenate((np.arange(3,5, 1), np.arange(5, 31, 5)))
    arr_reconstruction_penalty = [1e1, 1e2, 1e3]
    arr_unknown_attribute_penalty = [1e-2, 1e-1, 1e1]
    arr_unknown_attribute_noise_param = [1e-2, 1e-1, 1e1]

    # arr_n_latent_attribute_categorical = [20]
    # arr_reconstruction_penalty = [1e3]
    # arr_unknown_attribute_penalty = [1e1]
    # arr_unknown_attribute_noise_param = [1e1]
    
    id_ = get_model_id()
    for n_latent_attribute_categorical, reconstruction_penalty, unknown_attribute_penalty, unknown_attribute_noise_param in itertools.product(
            arr_n_latent_attribute_categorical, arr_reconstruction_penalty,
            arr_unknown_attribute_penalty, arr_unknown_attribute_noise_param):
        print(f"loop index is {id_}")
        
        biolord.Biolord.setup_anndata(
            adata,
            categorical_attributes_keys=["celltype", "organ", "age"],
            retrieval_attribute_key="sex",
        )
        
        module_params = {
            "autoencoder_width": 128,
            "autoencoder_depth": 2,
            "attribute_nn_width": 256,
            "attribute_nn_depth": 2,
            "n_latent_attribute_categorical": n_latent_attribute_categorical,
            "loss_ae": "gauss",
            "loss_ordered_attribute": "gauss",
            "reconstruction_penalty": reconstruction_penalty,
            "unknown_attribute_penalty": unknown_attribute_penalty,
            "unknown_attribute_noise_param": unknown_attribute_noise_param,
            "attribute_dropout_rate": 0.1,
            "use_batch_norm": False,
            "use_layer_norm": False,
            "seed": 42,
        }

        trainer_params = {
            "n_epochs_warmup": 0,
            "autoencoder_lr": 1e-4,
            "autoencoder_wd": 1e-4,
            "attribute_nn_lr": 1e-2,
            "attribute_nn_wd": 4e-8,
            "step_size_lr": 45,
            "cosine_scheduler": True,
            "scheduler_final_lr": 1e-5,
        }
        model = train_model(module_params, trainer_params)
        scores = cluster_evaluate(model, id_)
        scores['n_latent_attribute_categorical'] = n_latent_attribute_categorical
        scores['reconstruction_penalty'] = reconstruction_penalty
        scores['unknown_attribute_penalty'] = unknown_attribute_penalty
        scores['unknown_attribute_noise_param'] = unknown_attribute_noise_param
        scores['id_'] = id_
        scores = pd.DataFrame(scores)
#         model.save(SAVE_DIR + "trained_model_" + str(id_), overwrite=True)
        if id_ == 1 or not exists(LOGS_CSV):
            scores.to_csv(LOGS_CSV)
        else:
            scores.to_csv(LOGS_CSV, mode='a', header=False)
        id+= 1

In [16]:
model_training_iterations()

[rank: 0] Global seed set to 42


loop index is 1
Simaple value count of train, test, OOD:
train    57436
test      6382
ood        160
Name: split, dtype: int64


Train, test, OOD by percentage:
train    89.774610
test      9.975304
ood       0.250086
Name: split, dtype: float64


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 112/1000:  11%|█         | 112/1000 [08:27<1:07:02,  4.53s/it, v_num=1, val_generative_mean_accuracy=0.416, val_generative_var_accuracy=0.205, val_biolord_metric=0.31, val_reconstruction_loss=174, val_unknown_attribute_penalty_loss=3.19, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=1.86, unknown_attribute_penalty_loss=3.3]  
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.312. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.480205          15
1   celltype                 Completeness  0.436347          13
2   celltype                    V-measure  0.455523          13
3   celltype                   Rand index  0.876155          13
4   celltype          Adjusted Rand Index  0.179388          13
5   celltype                           MI  1.105713          15
6   celltype                          NMI  0.455523          13
7   celltype  Adjusted Mutual Information  0.237039          13
8      organ                  Homogeneity  0.675562          15
9      organ                 Completeness  1.000000           3
10     organ                    V-measure  0.733680           4
11     organ                   Rand index  0.893134          15
12     organ          Adjusted Rand Index  0.414474           4
13     organ                           MI  1.484361          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 117/1000:  12%|█▏        | 117/1000 [08:39<1:05:23,  4.44s/it, v_num=1, val_generative_mean_accuracy=0.417, val_generative_var_accuracy=0.206, val_biolord_metric=0.311, val_reconstruction_loss=183, val_unknown_attribute_penalty_loss=2.82, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=1.85, unknown_attribute_penalty_loss=3.24]
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.313. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.555523          15
1   celltype                 Completeness  0.481075          15
2   celltype                    V-measure  0.515626          15
3   celltype                   Rand index  0.887890          15
4   celltype          Adjusted Rand Index  0.229789           7
5   celltype                           MI  1.279140          15
6   celltype                          NMI  0.515626          15
7   celltype  Adjusted Mutual Information  0.319738           7
8      organ                  Homogeneity  0.602844          11
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.603537           5
11     organ                   Rand index  0.881398          12
12     organ          Adjusted Rand Index  0.355338           8
13     organ                           MI  1.324584          11
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 87/1000:   9%|▊         | 87/1000 [06:26<1:07:39,  4.45s/it, v_num=1, val_generative_mean_accuracy=0.2, val_generative_var_accuracy=0.0309, val_biolord_metric=0.115, val_reconstruction_loss=3.13, val_unknown_attribute_penalty_loss=5.77, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=2.53, unknown_attribute_penalty_loss=3.21e+3]    
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.161. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.550599          15
1   celltype                 Completeness  0.548682           7
2   celltype                    V-measure  0.509973          15
3   celltype                   Rand index  0.884894          15
4   celltype          Adjusted Rand Index  0.283594           7
5   celltype                           MI  1.267802          15
6   celltype                          NMI  0.509973          15
7   celltype  Adjusted Mutual Information  0.387278           7
8      organ                  Homogeneity  0.634920          15
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.573312          15
11     organ                   Rand index  0.886142          15
12     organ          Adjusted Rand Index  0.273503          11
13     organ                           MI  1.395061          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 110/1000:  11%|█         | 110/1000 [08:14<1:06:36,  4.49s/it, v_num=1, val_generative_mean_accuracy=0.415, val_generative_var_accuracy=0.204, val_biolord_metric=0.31, val_reconstruction_loss=175, val_unknown_attribute_penalty_loss=3.35, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=1.87, unknown_attribute_penalty_loss=4.01] 
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.312. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.478101          15
1   celltype                 Completeness  0.432113          13
2   celltype                    V-measure  0.450943          13
3   celltype                   Rand index  0.874906          13
4   celltype          Adjusted Rand Index  0.172239          13
5   celltype                           MI  1.100867          15
6   celltype                          NMI  0.450943          13
7   celltype  Adjusted Mutual Information  0.230853          13
8      organ                  Homogeneity  0.670711          15
9      organ                 Completeness  1.000000           3
10     organ                    V-measure  0.733680           4
11     organ                   Rand index  0.891885          15
12     organ          Adjusted Rand Index  0.414474           4
13     organ                           MI  1.473702          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 135/1000:  14%|█▎        | 135/1000 [10:08<1:04:59,  4.51s/it, v_num=1, val_generative_mean_accuracy=0.419, val_generative_var_accuracy=0.206, val_biolord_metric=0.313, val_reconstruction_loss=196, val_unknown_attribute_penalty_loss=1.77, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=1.81, unknown_attribute_penalty_loss=2.56]
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.313. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.511123          14
1   celltype                 Completeness  0.453164          14
2   celltype                    V-measure  0.480402          14
3   celltype                   Rand index  0.883146          14
4   celltype          Adjusted Rand Index  0.195130          14
5   celltype                           MI  1.176904          14
6   celltype                          NMI  0.480402          14
7   celltype  Adjusted Mutual Information  0.255119          14
8      organ                  Homogeneity  0.663108          15
9      organ                 Completeness  1.000000           3
10     organ                    V-measure  0.603537           5
11     organ                   Rand index  0.890886          15
12     organ          Adjusted Rand Index  0.354448           5
13     organ                           MI  1.456996          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 87/1000:   9%|▊         | 87/1000 [06:31<1:08:28,  4.50s/it, v_num=1, val_generative_mean_accuracy=0.206, val_generative_var_accuracy=0.0359, val_biolord_metric=0.121, val_reconstruction_loss=3.05, val_unknown_attribute_penalty_loss=5.77, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=2.53, unknown_attribute_penalty_loss=3.22e+3]  
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.161. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.566020          13
1   celltype                 Completeness  0.548682           7
2   celltype                    V-measure  0.545092          13
3   celltype                   Rand index  0.886642          14
4   celltype          Adjusted Rand Index  0.283594           7
5   celltype                           MI  1.303309          13
6   celltype                          NMI  0.545092          13
7   celltype  Adjusted Mutual Information  0.387278           7
8      organ                  Homogeneity  0.601520          15
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.609192           9
11     organ                   Rand index  0.877403          15
12     organ          Adjusted Rand Index  0.322607           9
13     organ                           MI  1.321674          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 112/1000:  11%|█         | 112/1000 [08:25<1:06:46,  4.51s/it, v_num=1, val_generative_mean_accuracy=0.411, val_generative_var_accuracy=0.191, val_biolord_metric=0.301, val_reconstruction_loss=142, val_unknown_attribute_penalty_loss=3.19, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=1.88, unknown_attribute_penalty_loss=25]  
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.302. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.428509          15
1   celltype                 Completeness  0.429107           8
2   celltype                    V-measure  0.411228          13
3   celltype                   Rand index  0.862672          15
4   celltype          Adjusted Rand Index  0.162292           8
5   celltype                           MI  0.986678          15
6   celltype                          NMI  0.411228          13
7   celltype  Adjusted Mutual Information  0.256116           8
8      organ                  Homogeneity  0.703361          15
9      organ                 Completeness  1.000000           4
10     organ                    V-measure  0.733680           4
11     organ                   Rand index  0.893883          15
12     organ          Adjusted Rand Index  0.418595           6
13     organ                           MI  1.545443          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 112/1000:  11%|█         | 112/1000 [08:43<1:09:12,  4.68s/it, v_num=1, val_generative_mean_accuracy=0.411, val_generative_var_accuracy=0.196, val_biolord_metric=0.304, val_reconstruction_loss=146, val_unknown_attribute_penalty_loss=3.19, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=1.88, unknown_attribute_penalty_loss=25.4]
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.304. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.497692          15
1   celltype                 Completeness  0.482412           8
2   celltype                    V-measure  0.464958          15
3   celltype                   Rand index  0.878402          15
4   celltype          Adjusted Rand Index  0.207757           7
5   celltype                           MI  1.145978          15
6   celltype                          NMI  0.464958          15
7   celltype  Adjusted Mutual Information  0.307031           8
8      organ                  Homogeneity  0.642624          15
9      organ                 Completeness  1.000000           4
10     organ                    V-measure  0.733680           4
11     organ                   Rand index  0.886142          15
12     organ          Adjusted Rand Index  0.414474           4
13     organ                           MI  1.411989          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 87/1000:   9%|▊         | 87/1000 [06:43<1:10:38,  4.64s/it, v_num=1, val_generative_mean_accuracy=0.203, val_generative_var_accuracy=0.0384, val_biolord_metric=0.121, val_reconstruction_loss=3.05, val_unknown_attribute_penalty_loss=5.77, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=2.53, unknown_attribute_penalty_loss=3.23e+3]  
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.161. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.550599          15
1   celltype                 Completeness  0.548682           7
2   celltype                    V-measure  0.509973          15
3   celltype                   Rand index  0.884894          15
4   celltype          Adjusted Rand Index  0.283594           7
5   celltype                           MI  1.267802          15
6   celltype                          NMI  0.509973          15
7   celltype  Adjusted Mutual Information  0.387278           7
8      organ                  Homogeneity  0.634920          15
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.573312          15
11     organ                   Rand index  0.886142          15
12     organ          Adjusted Rand Index  0.273503          11
13     organ                           MI  1.395061          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 18/1000:   2%|▏         | 17/1000 [01:19<1:16:00,  4.64s/it, v_num=1, val_generative_mean_accuracy=0.365, val_generative_var_accuracy=0.201, val_biolord_metric=0.283, val_reconstruction_loss=33.1, val_unknown_attribute_penalty_loss=23.5, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=32.5, unknown_attribute_penalty_loss=23.9]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


For attribute celltype the # of unique true labels is: 10


KeyboardInterrupt: 