# Train a `bioLORD` model with `developing human immune across tissue` for `bioLORD` (B-cells)

The data was generated by Suo et al.[[1]](https://www.science.org/doi/full/10.1126/science.abo0510) and downloaded from [Lymphoid cells](https://cellgeni.cog.sanger.ac.uk/developmentcellatlas/fetal-immune/PAN.A01.v01.raw_count.20210429.LYMPHOID.embedding.h5ad). <br>
The complete dataset contains a cross-tissue single-cell atlas of developing human immune cells across prenatal hematopoietic, lymphoid, and nonlymphoid peripheral organs. This includes over 900,000 cells from which we identified over 100 cell states.

[[1] Suo, Chenqu, Emma Dann, Issac Goh, Laura Jardine, Vitalii Kleshchevnikov, Jong-Eun Park, Rachel A. Botting et al. "Mapping the developing human immune system across organs." Science (2022): eabo0510.](https://www.science.org/doi/full/10.1126/science.abo0510)


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
sys.path.append("/cs/usr/bar246802/bar246802/SandBox2023/biolord_immune_bcells/utils") # add utils
sys.path.append("/cs/usr/bar246802/bar246802/SandBox2023/biolord") # set path)

In [3]:
import biolord
import scanpy as sc
import anndata
import numpy as np
import pandas as pd
from os.path import exists
import torch
import umap.plot
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
from cluster_analysis import *
from formatters import *

[rank: 0] Global seed set to 0


In [4]:
print(f"PyTorch version: {torch.__version__}")
# Set the device      
device = "gpu" if torch.backends.cuda.is_built() else "cpu"
print(f"Using device: {device}")

PyTorch version: 1.13.1+cu117
Using device: gpu


In [5]:
from tqdm import tqdm
tqdm(disable=True, total=0)  # initialise internal lock

<tqdm.std.tqdm at 0x7f88af01f940>

In [6]:
import mplscience
mplscience.set_style()

plt.rcParams['legend.scatterpoints'] = 1

## Set parameters

In [7]:
DATA_DIR = "../data/"
SAVE_DIR = "../output/"
FIG_DIR = "../figures/"
LOGS_CSV = SAVE_DIR + "trained_models_scores.csv"

## Import processed data

In [8]:
adata = sc.read(DATA_DIR + "biolord_immune_bcells_bm.h5ad")

In [9]:
adata.obs["split"].value_counts()

train    57436
test      6382
ood        160
Name: split, dtype: int64

In [17]:
def cluster_evaluate_figures(attribute_):
    ground_truth_labels = np.array(df[attribute_ + '_key'])
    print("Number of samples:", ground_truth_labels.size)
    title = "Attribute: " + attribute_ 
    path = FIG_DIR + attribute_ + "_"
    scores = matrices_figures(transf_embeddings_attributes, ground_truth_labels, df,
                        attributes_map_rev, attribute_, title, path)

In [18]:
def cluster_evaluate(model, id_, attributes = ['celltype', 'organ']):
    transf_embeddings_attributes, df = get_transf_embeddings_attributes(model)
    all_scores = None
    for attribute in attributes:
        ground_truth_labels = np.array(df[attribute + '_key'])
        ground_truth_unique_labels = list(set(ground_truth_labels))
        print(f'For attribute {attribute} the # of unique true labels is: {len(ground_truth_unique_labels)}')
        path = SAVE_DIR + attribute + "_"
        n_clusters_range = np.arange(2, 16).astype(int)
        scores = get_kmeans_score(transf_embeddings_attributes, ground_truth_labels, n_clusters_range=n_clusters_range, id_=id_, save_path=path)
        scores['attribute'] = attribute
        if all_scores is not None:
            all_scores = pd.concat([all_scores, scores], ignore_index=True)
        else:
            all_scores = scores
    cols = ['attribute', 'score_name', 'score', 'n_clusters']
    all_scores = all_scores[cols]
    print(all_scores)
    return all_scores

In [19]:
def split_adata_into_train_test():
    from sklearn.model_selection import train_test_split
    adata.obs['split'] = 'nan'
    ood_samples = adata.obs.sample(frac = 0.0025, random_state=42).index
    adata.obs.loc[ood_samples, "split"] = 'ood'

    adata_idx = adata.obs_names[adata.obs["split"] != 'ood']
    adata_idx_train, adata_idx_test = train_test_split(adata_idx, test_size=0.1, random_state=42)
    adata.obs.loc[adata_idx_train, "split"] = 'train'
    adata.obs.loc[adata_idx_test, "split"] = 'test'
    a = adata.obs['split'].value_counts()
    print("Simaple value count of train, test, OOD:")
    print(a)
    print("\n")
    print("Train, test, OOD by percentage:")
    p = adata.obs['split'].value_counts(normalize=True) * 100
    print(p)

In [20]:
def train_model(module_params, trainer_params):
    # before each train we wish to re-split the data to make sure we are not biased to a certain split
    split_adata_into_train_test()
    model = biolord.Biolord(
        adata=adata,
        n_latent=32,
        model_name="immune_bcells",
        module_params=module_params,
        train_classifiers=False,
        split_key="split",
    )

    model.train(max_epochs=1000,
            use_gpu=True,
            batch_size=512,
            plan_kwargs=trainer_params,
            early_stopping=True,
            early_stopping_patience=20,
            check_val_every_n_epoch=10,
            enable_checkpointing=False,
            num_workers=1)
    return model

In [21]:
def get_model_id():
    id_ = 1
    if exists(LOGS_CSV):
        df_logs = pd.read_csv(LOGS_CSV)
        id_ = df_logs['id_'].max()
        if str(id_).isnumeric():
            id_ += 1
        else:
            id_ = 1
    return id_

In [None]:
arr_n_latent_attribute_categorical = np.concatenate(
    (np.arange(3, 5, 1), np.arange(5, 31, 5)))
arr_reconstruction_penalty = [1e1, 1e2, 1e3]
arr_unknown_attribute_penalty = [1e-2, 1e-1, 1e1]
arr_unknown_attribute_noise_param = [1e-2, 1e-1, 1e1]
parms_combos = itertools.product(arr_n_latent_attribute_categorical,
                                 arr_reconstruction_penalty,
                                 arr_unknown_attribute_penalty,
                                 arr_unknown_attribute_noise_param)
for i, n in enumerate(parms_combos):
    if i < 10:
        continue
    print(n, 'i=', i)

In [None]:
def model_training_iterations():
    arr_n_latent_attribute_categorical = np.concatenate(
        (np.arange(3, 5, 1), np.arange(5, 31, 5)))
    arr_reconstruction_penalty = [1e1, 1e2, 1e3]
    arr_unknown_attribute_penalty = [1e-2, 1e-1, 1e1]
    arr_unknown_attribute_noise_param = [1e-2, 1e-1, 1e1]

    # arr_n_latent_attribute_categorical = [20]
    # arr_reconstruction_penalty = [1e3]
    # arr_unknown_attribute_penalty = [1e1]
    # arr_unknown_attribute_noise_param = [1e1]
    parms_combos = itertools.product(arr_n_latent_attribute_categorical,
                                     arr_reconstruction_penalty,
                                     arr_unknown_attribute_penalty,
                                     arr_unknown_attribute_noise_param)
    id_ = get_model_id()
    for i, (n_latent_attribute_categorical, reconstruction_penalty,
            unknown_attribute_penalty,
            unknown_attribute_noise_param) in enumerate(parms_combos):
        if i + 1 < id_:
            continue
        print(
            f'n_latent_attribute_categorical = {n_latent_attribute_categorical}, reconstruction_penalty = {reconstruction_penalty},unknown_attribute_penalty = {unknown_attribute_penalty}, unknown_attribute_noise_param = {unknown_attribute_noise_param}, i={i+1}'
        )

        biolord.Biolord.setup_anndata(
            adata,
            categorical_attributes_keys=["celltype", "organ", "age"],
            retrieval_attribute_key="sex",
        )

        module_params = {
            "autoencoder_width": 128,
            "autoencoder_depth": 2,
            "attribute_nn_width": 256,
            "attribute_nn_depth": 2,
            "n_latent_attribute_categorical": n_latent_attribute_categorical,
            "loss_ae": "gauss",
            "loss_ordered_attribute": "gauss",
            "reconstruction_penalty": reconstruction_penalty,
            "unknown_attribute_penalty": unknown_attribute_penalty,
            "unknown_attribute_noise_param": unknown_attribute_noise_param,
            "attribute_dropout_rate": 0.1,
            "use_batch_norm": False,
            "use_layer_norm": False,
            "seed": 42,
        }

        trainer_params = {
            "n_epochs_warmup": 0,
            "autoencoder_lr": 1e-4,
            "autoencoder_wd": 1e-4,
            "attribute_nn_lr": 1e-2,
            "attribute_nn_wd": 4e-8,
            "step_size_lr": 45,
            "cosine_scheduler": True,
            "scheduler_final_lr": 1e-5,
        }
        model = train_model(module_params, trainer_params)
        scores = cluster_evaluate(model, id_)
        scores[
            'n_latent_attribute_categorical'] = n_latent_attribute_categorical
        scores['reconstruction_penalty'] = reconstruction_penalty
        scores['unknown_attribute_penalty'] = unknown_attribute_penalty
        scores['unknown_attribute_noise_param'] = unknown_attribute_noise_param
        scores['id_'] = id_
        scores = pd.DataFrame(scores)
        #         model.save(SAVE_DIR + "trained_model_" + str(id_), overwrite=True)
        if id_ == 1 or not exists(LOGS_CSV):
            scores.to_csv(LOGS_CSV)
        else:
            scores.to_csv(LOGS_CSV, mode='a', header=False)
        id_ += 1

In [46]:
def model_training_iterations():
    arr_n_latent_attribute_categorical = np.concatenate(
        (np.arange(3, 5, 1), np.arange(5, 31, 5)))
    arr_reconstruction_penalty = [1e1, 1e2, 1e3]
    arr_unknown_attribute_penalty = [1e-2, 1e-1, 1e1]
    arr_unknown_attribute_noise_param = [1e-2, 1e-1, 1e1]

    # arr_n_latent_attribute_categorical = [20]
    # arr_reconstruction_penalty = [1e3]
    # arr_unknown_attribute_penalty = [1e1]
    # arr_unknown_attribute_noise_param = [1e1]
    parms_combos = itertools.product(arr_n_latent_attribute_categorical,
                                     arr_reconstruction_penalty,
                                     arr_unknown_attribute_penalty,
                                     arr_unknown_attribute_noise_param)
    id_ = get_model_id()
    for i, (n_latent_attribute_categorical, reconstruction_penalty,
            unknown_attribute_penalty,
            unknown_attribute_noise_param) in enumerate(parms_combos):
        if i + 1 < id_:
            continue
        print(
            f'n_latent_attribute_categorical = {n_latent_attribute_categorical}, reconstruction_penalty = {reconstruction_penalty},unknown_attribute_penalty = {unknown_attribute_penalty}, unknown_attribute_noise_param = {unknown_attribute_noise_param}, i={i+1}'
        )

        biolord.Biolord.setup_anndata(
            adata,
            categorical_attributes_keys=["celltype", "organ", "age"],
            retrieval_attribute_key="sex",
        )

        module_params = {
            "autoencoder_width": 128,
            "autoencoder_depth": 2,
            "attribute_nn_width": 256,
            "attribute_nn_depth": 2,
            "n_latent_attribute_categorical": n_latent_attribute_categorical,
            "loss_ae": "gauss",
            "loss_ordered_attribute": "gauss",
            "reconstruction_penalty": reconstruction_penalty,
            "unknown_attribute_penalty": unknown_attribute_penalty,
            "unknown_attribute_noise_param": unknown_attribute_noise_param,
            "attribute_dropout_rate": 0.1,
            "use_batch_norm": False,
            "use_layer_norm": False,
            "seed": 42,
        }

        trainer_params = {
            "n_epochs_warmup": 0,
            "autoencoder_lr": 1e-4,
            "autoencoder_wd": 1e-4,
            "attribute_nn_lr": 1e-2,
            "attribute_nn_wd": 4e-8,
            "step_size_lr": 45,
            "cosine_scheduler": True,
            "scheduler_final_lr": 1e-5,
        }
        model = train_model(module_params, trainer_params)
        scores = cluster_evaluate(model, id_)
        scores[
            'n_latent_attribute_categorical'] = n_latent_attribute_categorical
        scores['reconstruction_penalty'] = reconstruction_penalty
        scores['unknown_attribute_penalty'] = unknown_attribute_penalty
        scores['unknown_attribute_noise_param'] = unknown_attribute_noise_param
        scores['id_'] = id_
        scores = pd.DataFrame(scores)
        #         model.save(SAVE_DIR + "trained_model_" + str(id_), overwrite=True)
        if id_ == 1 or not exists(LOGS_CSV):
            scores.to_csv(LOGS_CSV)
        else:
            scores.to_csv(LOGS_CSV, mode='a', header=False)
        id_ += 1

In [None]:
model_training_iterations()

[rank: 0] Global seed set to 42


n_latent_attribute_categorical = 3, reconstruction_penalty = 100.0,unknown_attribute_penalty = 0.01, unknown_attribute_noise_param = 0.1, i=11
Simaple value count of train, test, OOD:
train    57436
test      6382
ood        160
Name: split, dtype: int64


Train, test, OOD by percentage:
train    89.774610
test      9.975304
ood       0.250086
Name: split, dtype: float64


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 104/1000:  10%|█         | 104/1000 [08:16<1:11:13,  4.77s/it, v_num=1, val_generative_mean_accuracy=0.45, val_generative_var_accuracy=0.239, val_biolord_metric=0.345, val_reconstruction_loss=263, val_unknown_attribute_penalty_loss=3.88, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=31.2, unknown_attribute_penalty_loss=4.32] 
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.346. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.418602          15
1   celltype                 Completeness  0.377541          11
2   celltype                    V-measure  0.392165          15
3   celltype                   Rand index  0.867416          15
4   celltype          Adjusted Rand Index  0.107828           8
5   celltype                           MI  0.963867          15
6   celltype                          NMI  0.392165          15
7   celltype  Adjusted Mutual Information  0.169498          11
8      organ                  Homogeneity  0.744135          14
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.750873           6
11     organ                   Rand index  0.910612          14
12     organ          Adjusted Rand Index  0.528489           6
13     organ                           MI  1.635032          14
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 124/1000:  12%|█▏        | 124/1000 [09:31<1:07:17,  4.61s/it, v_num=1, val_generative_mean_accuracy=0.243, val_generative_var_accuracy=-.263, val_biolord_metric=-.00992, val_reconstruction_loss=33.5, val_unknown_attribute_penalty_loss=2.36, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=32.7, unknown_attribute_penalty_loss=3.2e+3] 
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.162. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.590774          15
1   celltype                 Completeness  0.529788          15
2   celltype                    V-measure  0.558621          15
3   celltype                   Rand index  0.877403          15
4   celltype          Adjusted Rand Index  0.201743          15
5   celltype                           MI  1.360308          15
6   celltype                          NMI  0.558621          15
7   celltype  Adjusted Mutual Information  0.362534          15
8      organ                  Homogeneity  0.552920           4
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.712104           4
11     organ                   Rand index  0.859675          15
12     organ          Adjusted Rand Index  0.377622           4
13     organ                           MI  1.214890           4
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 104/1000:  10%|█         | 104/1000 [07:55<1:08:18,  4.57s/it, v_num=1, val_generative_mean_accuracy=0.45, val_generative_var_accuracy=0.238, val_biolord_metric=0.344, val_reconstruction_loss=259, val_unknown_attribute_penalty_loss=3.88, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=31.2, unknown_attribute_penalty_loss=4.59] 
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.345. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.499559          15
1   celltype                 Completeness  0.471330          11
2   celltype                    V-measure  0.476104          11
3   celltype                   Rand index  0.876654          15
4   celltype          Adjusted Rand Index  0.196319          11
5   celltype                           MI  1.150277          15
6   celltype                          NMI  0.476104          11
7   celltype  Adjusted Mutual Information  0.298256          11
8      organ                  Homogeneity  0.668902          15
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.775298           5
11     organ                   Rand index  0.893383          15
12     organ          Adjusted Rand Index  0.538995           5
13     organ                           MI  1.469727          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 104/1000:  10%|█         | 104/1000 [07:59<1:08:51,  4.61s/it, v_num=1, val_generative_mean_accuracy=0.45, val_generative_var_accuracy=0.239, val_biolord_metric=0.345, val_reconstruction_loss=265, val_unknown_attribute_penalty_loss=3.88, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=31.2, unknown_attribute_penalty_loss=4.91]
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.346. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.418602          15
1   celltype                 Completeness  0.377541          11
2   celltype                    V-measure  0.392165          15
3   celltype                   Rand index  0.867416          15
4   celltype          Adjusted Rand Index  0.107828           8
5   celltype                           MI  0.963867          15
6   celltype                          NMI  0.392165          15
7   celltype  Adjusted Mutual Information  0.169498          11
8      organ                  Homogeneity  0.744135          14
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.750873           6
11     organ                   Rand index  0.910612          14
12     organ          Adjusted Rand Index  0.528489           6
13     organ                           MI  1.635032          14
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 3/1000:   0%|          | 2/1000 [55:32<461:56:30, 1666.32s/it, v_num=1, val_generative_mean_accuracy=0.106, val_generative_var_accuracy=-.426, val_biolord_metric=-.16, val_reconstruction_loss=35.3, val_unknown_attribute_penalty_loss=30.8, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=35.3, unknown_attribute_penalty_loss=31.5]     


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f880bf30af0>
Traceback (most recent call last):
  File "/cs/labs/mornitzan/bar246802/SandBox2023/bioLordVenv/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/cs/labs/mornitzan/bar246802/SandBox2023/bioLordVenv/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f880bf30af0>
Traceback (most recent call last):
  File "/cs/labs/mornitzan/bar246802/SandBox2023/bioLordVenv/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/cs/labs/mornitzan/ba

Epoch 3/1000:   0%|          | 2/1000 [55:33<462:04:11, 1666.79s/it, v_num=1, val_generative_mean_accuracy=0.106, val_generative_var_accuracy=-.426, val_biolord_metric=-.16, val_reconstruction_loss=35.3, val_unknown_attribute_penalty_loss=30.8, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=35.3, unknown_attribute_penalty_loss=31.5]e+3]
Epoch 124/1000:  12%|█▏        | 124/1000 [09:34<1:07:36,  4.63s/it, v_num=1, val_generative_mean_accuracy=0.251, val_generative_var_accuracy=-.334, val_biolord_metric=-.0414, val_reconstruction_loss=33.5, val_unknown_attribute_penalty_loss=2.36, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=32.7, unknown_attribute_penalty_loss=3.21e+3]
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.161. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels i

[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.599531          15
1   celltype                 Completeness  0.552832          10
2   celltype                    V-measure  0.559708          15
3   celltype                   Rand index  0.884894          15
4   celltype          Adjusted Rand Index  0.222163          10
5   celltype                           MI  1.380471          15
6   celltype                          NMI  0.559708          15
7   celltype  Adjusted Mutual Information  0.402777          10
8      organ                  Homogeneity  0.568796          15
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.610314           3
11     organ                   Rand index  0.873159          15
12     organ          Adjusted Rand Index  0.288000           3
13     organ                           MI  1.249773          15
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 104/1000:  10%|█         | 104/1000 [07:59<1:08:52,  4.61s/it, v_num=1, val_generative_mean_accuracy=0.45, val_generative_var_accuracy=0.241, val_biolord_metric=0.345, val_reconstruction_loss=253, val_unknown_attribute_penalty_loss=3.88, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=31.2, unknown_attribute_penalty_loss=25.5] 
Monitored metric val_biolord_metric did not improve in the last 20 records. Best score: 0.346. Signaling Trainer to stop.
For attribute celltype the # of unique true labels is: 10
For attribute organ the # of unique true labels is: 9


[rank: 0] Global seed set to 42


   attribute                   score_name     score  n_clusters
0   celltype                  Homogeneity  0.504237          15
1   celltype                 Completeness  0.462868          12
2   celltype                    V-measure  0.472334          12
3   celltype                   Rand index  0.876404          15
4   celltype          Adjusted Rand Index  0.152801          11
5   celltype                           MI  1.161049          15
6   celltype                          NMI  0.472334          12
7   celltype  Adjusted Mutual Information  0.280984          11
8      organ                  Homogeneity  0.676040           5
9      organ                 Completeness  1.000000           2
10     organ                    V-measure  0.792112           5
11     organ                   Rand index  0.893134          15
12     organ          Adjusted Rand Index  0.547999           5
13     organ                           MI  1.485413           5
14     organ                          NM

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 55/1000:   5%|▌         | 54/1000 [04:08<1:13:25,  4.66s/it, v_num=1, val_generative_mean_accuracy=0.437, val_generative_var_accuracy=0.24, val_biolord_metric=0.338, val_reconstruction_loss=118, val_unknown_attribute_penalty_loss=11.7, generative_mean_accuracy=0, generative_var_accuracy=0, biolord_metric=0, reconstruction_loss=31.6, unknown_attribute_penalty_loss=28.7]  