---
title: 02 - human tunning
author: Martin Proks
date: 07-12-2023
---

This notebook contains multiple methods on how we trained the model. We summarize below which params were helpful in generating better integration.

- `n_layers`: should be 2 - 3
- `gene_dispersion`: `gene` proved to be the best
- `gene_likelihood`: `nb` prefered over `zinb`
- `dropout_rate`: smaller penalization keeps datapoints closer (`0.005`)

In [1]:
!which pip

~/projects/data/Brickman/conda/envs/scvi-1.0.0/bin/pip


In [2]:
import scvi
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt


from rich import print
from scib_metrics.benchmark import Benchmarker
from scvi.model.utils import mde


import warnings
from lightning_fabric.plugins.environments.slurm import PossibleUserWarning
warnings.simplefilter(action='ignore', category=PossibleUserWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

scvi.settings.seed = 42

  self.seed = seed
  self.dl_pin_memory_gpu_training = (
  from .autonotebook import tqdm as notebook_tqdm
[rank: 0] Global seed set to 42


In [3]:
sc.set_figure_params(figsize=(10, 6))

%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

plt.rcParams['svg.fonttype'] = 'none'

In [4]:
adata = sc.read("../data/processed/32_human_adata.h5ad")
adata

AnnData object with n_obs × n_vars = 2323 × 62754
    obs: 'day', 'ct', 'experiment', 'technology', 'n_counts', 'n_genes', 'ct_fine'
    layers: 'counts'

In [5]:
adata.obs.experiment.unique().tolist()

['Meistermann_2021',
 'Petropoulos_2016',
 'Xiang_2020',
 'Yan_2013',
 'Yanagida_2021',
 'Xue_2013']

In [7]:
sc.pp.highly_variable_genes(
    adata,
    flavor="seurat_v3",
    n_top_genes=3_000,
    layer="counts",
    batch_key="experiment",
    subset=True,
)

## 3. Pimp my model: ray tunner

In [8]:
import ray
import jax
import os

from ray import tune
from scvi import autotune

os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'

In [9]:
jax.devices()

[gpu(id=0), gpu(id=1)]

In [11]:
ref_tuner = sc.AnnData(adata.layers["counts"])
ref_tuner.obs = adata.obs[["technology", "experiment"]].copy()

model_cls = scvi.model.SCVI
model_cls.setup_anndata(ref_tuner, 
                        batch_key="experiment")

scvi_tuner = autotune.ModelTuner(model_cls)

In [12]:
scvi_tuner.info()

In [13]:
search_space = {
    "gene_likelihood": tune.choice(["nb", "zinb"]),
    "dispersion": tune.choice(["gene", "gene-batch"]),
    "n_hidden": tune.choice([128, 144, 256]),
    "n_layers": tune.choice([2, 3, 4, 5]),
    "lr": tune.loguniform(1e-4, 0.6),
}

In [14]:
ray.init(
    log_to_driver=False,
    num_cpus=10,
    num_gpus=2,
)

2023-12-07 12:27:30,713	INFO worker.py:1636 -- Started a local Ray instance.


0,1
Python version:,3.10.11
Ray version:,2.5.1


In [15]:
results = scvi_tuner.fit(
    ref_tuner,
    metric="validation_loss",
    search_space=search_space,
    num_samples=50,
    max_epochs=100,
)

0,1
Current time:,2023-12-07 12:46:54
Running for:,00:19:16.27
Memory:,469.6/4031.0 GiB

Trial name,# failures,error file
_trainable_f3b1a06f,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_f3b1a06f_7_dispersion=gene-batch,gene_likelihood=zinb,lr=0.4930,n_hidden=128,n_layers=4_2023-12-07_12-32-46/error.txt"
_trainable_7ca2ed73,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_7ca2ed73_9_dispersion=gene,gene_likelihood=nb,lr=0.4233,n_hidden=256,n_layers=5_2023-12-07_12-36-10/error.txt"
_trainable_eec818de,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_eec818de_16_dispersion=gene,gene_likelihood=nb,lr=0.2143,n_hidden=256,n_layers=4_2023-12-07_12-38-30/error.txt"
_trainable_d90facee,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_d90facee_19_dispersion=gene-batch,gene_likelihood=zinb,lr=0.1319,n_hidden=256,n_layers=2_2023-12-07_12-40-05/error.txt"
_trainable_f12e443a,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_f12e443a_21_dispersion=gene-batch,gene_likelihood=nb,lr=0.3660,n_hidden=144,n_layers=3_2023-12-07_12-41-01/error.txt"
_trainable_bcd5fd32,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_bcd5fd32_22_dispersion=gene,gene_likelihood=nb,lr=0.2291,n_hidden=144,n_layers=2_2023-12-07_12-41-15/error.txt"
_trainable_b8440d6d,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_b8440d6d_23_dispersion=gene-batch,gene_likelihood=nb,lr=0.1749,n_hidden=144,n_layers=2_2023-12-07_12-42-23/error.txt"
_trainable_e9fe5792,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_e9fe5792_27_dispersion=gene,gene_likelihood=zinb,lr=0.1686,n_hidden=128,n_layers=3_2023-12-07_12-42-56/error.txt"
_trainable_0d91ae4a,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_0d91ae4a_28_dispersion=gene-batch,gene_likelihood=zinb,lr=0.0855,n_hidden=256,n_layers=4_2023-12-07_12-43-01/error.txt"
_trainable_96bc8875,1,"/maps/projects/dan1/data/Brickman/projects/proks-salehin-et-al-2023/notebooks/ray/tune_scvi_2023-12-07-12:27:37/_trainable_96bc8875_29_dispersion=gene-batch,gene_likelihood=zinb,lr=0.3820,n_hidden=128,n_layers=3_2023-12-07_12-43-23/error.txt"

Trial name,status,loc,gene_likelihood,dispersion,n_hidden,n_layers,lr,validation_loss
_trainable_78d3bc3d,TERMINATED,10.84.5.120:3533767,zinb,gene-batch,128,2,0.0365808,11194.1
_trainable_c1b116e0,TERMINATED,10.84.5.120:3534500,zinb,gene,256,5,0.00183116,10236.2
_trainable_7bb06efc,TERMINATED,10.84.5.120:3533767,zinb,gene,144,2,0.00159217,10415.3
_trainable_a636a222,TERMINATED,10.84.5.120:3533767,nb,gene,256,3,0.000100827,24027.9
_trainable_28417f3c,TERMINATED,10.84.5.120:3533767,zinb,gene-batch,128,2,0.000106731,14757.4
_trainable_128c3ced,TERMINATED,10.84.5.120:3533767,nb,gene,144,4,0.0210739,13192.6
_trainable_88017ea7,TERMINATED,10.84.5.120:3534500,zinb,gene-batch,128,4,0.0243514,10226.9
_trainable_7d239652,TERMINATED,10.84.5.120:3562315,nb,gene-batch,128,5,0.00167404,23036.1
_trainable_44e62c77,TERMINATED,10.84.5.120:3562315,zinb,gene,128,4,0.00910985,91292500.0
_trainable_bf57f58a,TERMINATED,10.84.5.120:3562315,zinb,gene,144,2,0.00269743,9358.84


  r, k = function_base._ureduce(a,
2023-12-07 12:33:20,350	ERROR tune_controller.py:873 -- Trial task failed for trial _trainable_f3b1a06f
Traceback (most recent call last):
  File "/home/fdb589/projects/data/Brickman/conda/envs/scvi-1.0.0/lib/python3.10/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/home/fdb589/projects/data/Brickman/conda/envs/scvi-1.0.0/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 18, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/fdb589/projects/data/Brickman/conda/envs/scvi-1.0.0/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/home/fdb589/projects/data/Brickman/conda/envs/scvi-1.0.0/lib/python3.10/site-packages/ray/_private/worker.py", line 2540, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): [36mray::ImplicitFunc.train()[39m (

In [16]:
print(results.model_kwargs)
print(results.train_kwargs)
print(results.metric)

In [17]:
import pandas as pd


training = pd.DataFrame([
    [x.metrics['validation_loss']] + x.path.split(',')[1:]
    for x in results.results if 'validation_loss' in x.metrics
]).sort_values(by=0)

training.to_csv("../results/02_human_integration/tunning.csv")
display(training.head(10))

Unnamed: 0,0,1,2,3,4
9,9358.837891,gene_likelihood=zinb,lr=0.0027,n_hidden=144,n_layers=2_2023-12-07_12-37-28
6,10226.864258,gene_likelihood=zinb,lr=0.0244,n_hidden=128,n_layers=4_2023-12-07_12-33-17
1,10236.216797,gene_likelihood=zinb,lr=0.0018,n_hidden=256,n_layers=5_2023-12-07_12-27-47
2,10415.34082,gene_likelihood=zinb,lr=0.0016,n_hidden=144,n_layers=2_2023-12-07_12-30-03
0,11194.058594,gene_likelihood=zinb,lr=0.0366,n_hidden=128,n_layers=2_2023-12-07_12-27-37
5,13192.603516,gene_likelihood=nb,lr=0.0211,n_hidden=144,n_layers=4_2023-12-07_12-32-37
12,13270.201172,gene_likelihood=zinb,lr=0.0010,n_hidden=128,n_layers=4_2023-12-07_12-38-25
10,14023.433594,gene_likelihood=zinb,lr=0.0006,n_hidden=144,n_layers=3_2023-12-07_12-37-32
4,14757.439453,gene_likelihood=zinb,lr=0.0001,n_hidden=128,n_layers=2_2023-12-07_12-32-30
15,20656.691406,gene_likelihood=nb,lr=0.0059,n_hidden=144,n_layers=5_2023-12-07_12-40-10
