### Ray Tune

In [10]:
adata = sc.read_h5ad('/root/datos/maestria/netopaas/luca/data/atlas/extended.h5ad')

In [None]:
condition_key = 'dataset'
cell_type_key = 'cell_type_tumor'

In [11]:
adata

AnnData object with n_obs × n_vars = 1283972 × 17811
    obs: 'sample', 'uicc_stage', 'ever_smoker', 'age', 'donor_id', 'origin', 'dataset', 'ann_fine', 'cell_type_predicted', 'doublet_status', 'leiden', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'ann_coarse', 'cell_type_tumor', 'tumor_stage', 'EGFR_mutation', 'TP53_mutation', 'ALK_mutation', 'BRAF_mutation', 'ERBB2_mutation', 'KRAS_mutation', 'ROS_mutation', 'origin_fine', 'study', 'platform', 'cell_type_major', 'cell_type_neutro', 'cell_type_neutro_coarse', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'sex_ontology_term_id', 'tissue_ontology_term_id', 'tissue_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid'
    var: 'is_highly_v

In [12]:
adata = adata[(adata.obs.origin == 'tumor_primary')]

# stages = ['III','III or IV', 'IV']
stages = ['I','II','III','III or IV', 'IV']
adata = adata[adata.obs.uicc_stage.isin(stages)]

# we remove these because some are enriched for endotheliar and some for immune cells
studies = [
    'Goveia_Carmeliet_2020',
    'Leader_Merad_2021','Guo_Zhang_2018']
adata = adata[~adata.obs.study.isin(studies)]
adata = adata[:, adata.var.is_highly_variable == 'True']

In [13]:
import tempfile

import ray
import scanpy as sc
import scvi
import seaborn as sns
import torch
from ray import tune
from scvi import autotune

scvi.settings.seed = 0
print("Last run with scvi-tools version:", scvi.__version__)

sc.set_figure_params(figsize=(6, 6), frameon=False)
sns.set_theme()
torch.set_float32_matmul_precision("high")
save_dir = tempfile.TemporaryDirectory()
scvi.settings.logging_dir = save_dir.name

%config InlineBackend.print_figure_kwargs={"facecolor": "w"}
%config InlineBackend.figure_format="retina"

INFO: Global seed set to 0
INFO:lightning.fabric.utilities.seed:Global seed set to 0


Last run with scvi-tools version: 1.1.5


In [6]:
# source_adata = adata.copy()
# from scvi.data import synthetic_iid

# source_adata = synthetic_iid()

In [14]:
adata = adata.copy()

In [15]:
model_cls = scvi.model.SCVI
model_cls.setup_anndata(adata, batch_key='dataset', labels_key='cell_type_tumor')

  self.validate_field(adata)


In [11]:
search_space = {
    "model_params":{
        "n_hidden": tune.choice([ 512, 1024, 2048 ]),
        "n_layers": tune.choice([5,6,7]),
        "gene_likelihood": tune.choice(['nb','zinb']),
    },
    "train_params":{
        'plan_kwargs':tune.choice([{"reduce_lr_on_plateau": True}, {"reduce_lr_on_plateau": False}])
    }
}

In [13]:
scvi_tuner = autotune.run_autotune(model_cls, adata,  metrics="validation_loss", mode='min',
    search_space=search_space,
    num_samples=50,
    resources={"cpu": 22, 'gpu':0.25},
    experiment_name='scvi2048_2',
    searcher_kwargs={'points_to_evaluate':[{'model_params':{
                                        "n_hidden": 2048,
                                        "n_layers": 7,
                                        "gene_likelihood": 'nb',
                                  }}]})

0,1
Current time:,2024-08-08 15:21:19
Running for:,01:43:33.74
Memory:,575.9/1006.6 GiB

Trial name,status,loc,model_params/gene_li kelihood,model_params/n_hidde n,model_params/n_layer s,iter,total time (s),validation_loss
_trainable_d48bdb71,RUNNING,172.17.0.2:11638,nb,1024,5,3.0,1403.91,764.247
_trainable_4fb299a4,RUNNING,172.17.0.2:12303,zinb,512,6,1.0,338.014,792.089
_trainable_6763d53e,PENDING,,nb,512,7,,,
_trainable_6cf80f80,TERMINATED,172.17.0.2:6840,nb,2048,7,1.0,367.571,835.669
_trainable_53882f25,TERMINATED,172.17.0.2:7208,nb,1024,6,20.0,2977.18,750.177
_trainable_1844f66d,TERMINATED,172.17.0.2:7444,zinb,2048,7,1.0,482.594,854.169
_trainable_46e3a77a,TERMINATED,172.17.0.2:7782,nb,1024,5,4.0,551.264,759.085
_trainable_db792fd2,TERMINATED,172.17.0.2:8031,zinb,512,7,2.0,307.559,777.508
_trainable_8778860e,TERMINATED,172.17.0.2:8336,zinb,2048,6,1.0,412.1,803.883
_trainable_1fe49448,TERMINATED,172.17.0.2:8577,nb,1024,7,4.0,707.352,759.203


KeyError: 'model_params/gene_likelihood'

In [26]:
scvi_tuner.result_grid

ResultGrid<[
  Result(
    metrics={'validation_loss': 754.237548828125},
    path='/tmp/tmphrzogsut/scvi1024/scvi1024/_trainable_b87f55fe_1_gene_likelihood=nb,n_hidden=256,n_layers=4_2024-08-08_03-25-39',
    filesystem='local',
    checkpoint=None
  ),
  Result(
    metrics={'validation_loss': 790.5972900390625},
    path='/tmp/tmphrzogsut/scvi1024/scvi1024/_trainable_899e4360_2_gene_likelihood=zinb,n_hidden=1024,n_layers=5_2024-08-08_03-25-51',
    filesystem='local',
    checkpoint=None
  ),
  Result(
    metrics={'validation_loss': 750.9778442382812},
    path='/tmp/tmphrzogsut/scvi1024/scvi1024/_trainable_0a38590b_3_gene_likelihood=nb,n_hidden=512,n_layers=5_2024-08-08_03-26-02',
    filesystem='local',
    checkpoint=None
  ),
  Result(
    metrics={'validation_loss': 772.2880859375},
    path='/tmp/tmphrzogsut/scvi1024/scvi1024/_trainable_af8dd8f4_4_gene_likelihood=nb,n_hidden=512,n_layers=4_2024-08-08_03-26-14',
    filesystem='local',
    checkpoint=None
  ),
  Result(
    me

In [21]:
scvi_tuner.__dict__

{'_model_cls': scvi.model._scvi.SCVI,
 '_data': AnnData object with n_obs × n_vars = 402634 × 5989
     obs: 'sample', 'uicc_stage', 'ever_smoker', 'age', 'donor_id', 'origin', 'dataset', 'ann_fine', 'cell_type_predicted', 'doublet_status', 'leiden', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'ann_coarse', 'cell_type_tumor', 'tumor_stage', 'EGFR_mutation', 'TP53_mutation', 'ALK_mutation', 'BRAF_mutation', 'ERBB2_mutation', 'KRAS_mutation', 'ROS_mutation', 'origin_fine', 'study', 'platform', 'cell_type_major', 'cell_type_neutro', 'cell_type_neutro_coarse', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'sex_ontology_term_id', 'tissue_ontology_term_id', 'tissue_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_sta

In [27]:
best_result = results.get_best_result('validation_loss', mode='min')

In [28]:
best_result.__dict__

{'metrics': {'validation_loss': 748.1448364257812,
  'timestamp': 1723090185,
  'checkpoint_dir_name': None,
  'done': True,
  'training_iteration': 20,
  'trial_id': '15780a2d',
  'date': '2024-08-08_04-09-45',
  'time_this_iter_s': 96.944420337677,
  'time_total_s': 2443.7222259044647,
  'pid': 11872,
  'hostname': '6ec9f87ec9e7',
  'node_ip': '172.17.0.3',
  'config': {'model_params': {'n_hidden': 1024,
    'n_layers': 5,
    'gene_likelihood': 'zinb'},
   'train_params': {'plan_kwargs': {}}},
  'time_since_restore': 2443.7222259044647,
  'iterations_since_restore': 20,
  'experiment_tag': '5_gene_likelihood=zinb,n_hidden=1024,n_layers=5'},
 'checkpoint': None,
 'error': None,
 'path': '/tmp/tmphrzogsut/scvi1024/scvi1024/_trainable_15780a2d_5_gene_likelihood=zinb,n_hidden=1024,n_layers=5_2024-08-08_03-26-25',
 'metrics_dataframe':     validation_loss   timestamp checkpoint_dir_name   done  \
 0        790.597290  1723087906                None  False   
 1        768.241699  1723088

In [27]:
best_result = results.get_best_result('validation_loss', mode='min')
with best_result.checkpoint.as_directory() as checkpoint_dir:
    state_dict = torch.load(os.path.join(checkpoint_dir, "model.pth"))
state_dict

AttributeError: 'NoneType' object has no attribute 'as_directory'