In [1]:
import os
import sys
from pathlib import Path

currentdir = os.getcwd()
parentdir = str(Path(currentdir).parents[2])
sys.path.insert(0, parentdir)

import spared

In [2]:
from spared.datasets import get_dataset
test_dataset = get_dataset("villacampa_lung_organoid", visualize=False)

Loading villacampa_lung_organoid dataset with the following data split:
train data: ['V19D02-088_A1', 'V19D02-088_B1']
val data: ['V19D02-088_C1']
test data: ['V19D02-088_D1']
Parameters not found so this set of parameters is saved in /media/SSD4/gmmejia/Library_Spared_Spackle/spared/processed_data/villacampa_data/villacampa_lung_organoid/2024-06-13-18-46-33
Computing main adata file from downloaded raw data...
Loading train data
Loading val data
Loading test data
Starting data filtering...
Data filtering took 0.78 seconds
Number of genes that passed the filtering:        641 out of 33538 (1.91%)
Number of observations that passed the filtering: 1832 out of 1832 (100.00%)
Starting data processing...
Number of genes not found in GTF file by TPM normalization: 13 out of 641 (2.03%) (628 remaining)
Applying adaptive median filter to collection...


100%|██████████| 4/4 [00:03<00:00,  1.05it/s]


Computing Moran's I for each gene over each slide using data of layer d_log1p...


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 1000/1000 [00:16<00:00, 61.13/s]
100%|██████████| 1000/1000 [00:04<00:00, 202.36/s]
100%|██████████| 1000/1000 [00:01<00:00, 678.95/s]
100%|██████████| 1000/1000 [00:01<00:00, 556.73/s]


Filtering genes by Moran's I. Keeping top 128 genes.
Found 4 batches.
Adjusting for 0 covariate(s) or covariate level(s).
Standardizing Data across genes.
Fitting L/S model and finding priors.
Finding parametric adjustments.
Adjusting the Data
Found 4 batches.
Adjusting for 0 covariate(s) or covariate level(s).
Standardizing Data across genes.
Fitting L/S model and finding priors.
Finding parametric adjustments.
Adjusting the Data
Percentage of imputed observations with median filter: 25.460%
Processing of the data took 31.74 seconds
The processed dataset looks like this:
AnnData object with n_obs × n_vars = 1832 × 128
    obs: 'in_tissue', 'array_row', 'array_col', 'patient', 'slide_id', 'split', 'unique_id', 'n_genes_by_counts', 'total_counts'
    var: 'gene_ids', 'feature_types', 'genome', 'gene_symbol', 'exp_frac', 'glob_exp_frac', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'gene_length', 'd_log1p_moran', 'log1p_avg_exp', 'd_log1p_avg_exp', 'c_log1

In [3]:
from spared.dataloaders import get_pretrain_dataloaders
# Declare train and test loaders
train_dataloader, val_dataloader, test_dataloader = get_pretrain_dataloaders(
    adata=test_dataset.adata,
    layer = 'c_d_log1p',
    batch_size = 265,
    shuffle = True,
    use_cuda = True
)

Using noisy_delta layer for training. This will probably yield bad results.
Percentage of imputed observations with median filter: 25.460%


In [4]:
from spared.models import ImageBackbone
import argparse
import torch

test_args = argparse.Namespace()

arg_dict = vars(test_args)

input_dict = {
    'img_backbone': 'ShuffleNetV2',
    'img_use_pretrained': True,
    'average_test': False,
    'optim_metric': 'MSE',
    'robust_loss': False,
    'optimizer': 'Adam',
    'lr': 0.0001,
    'momentum': 0.9,
}

for key,value in input_dict.items():
    arg_dict[key]= value


# Declare device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

test_model = ImageBackbone(args=test_args,  latent_dim=test_dataset.adata.n_vars).to(device)



In [5]:
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint

# Define checkpoint callback to save best model in validation
checkpoint_callback = ModelCheckpoint(
    monitor=f'val_MSE', # Choose your validation metric
    save_top_k=1, # Save only the best model
    mode='min'
)

# Define the trainier and fit the model
trainer = Trainer(
    max_steps=1000,
    val_check_interval=10,
    log_every_n_steps=10,
    callbacks=[checkpoint_callback],
    check_val_every_n_epoch=None,
    devices=1,
    enable_progress_bar=True,
    enable_model_summary=True
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [6]:
trainer.fit(
    model=test_model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name            | Type         | Params
-------------------------------------------------
0 | test_transforms | Normalize    | 0     
1 | criterion       | MSELoss      | 0     
2 | encoder         | ShuffleNetV2 | 472 K 
-------------------------------------------------
472 K     Trainable params
0         Non-trainable params
472 K     Total params
1.892     Total estimated model params size (MB)


Epoch 249: 100%|██████████| 4/4 [00:00<00:00,  6.34it/s, v_num=3]          

`Trainer.fit` stopped: `max_steps=1000` reached.


Epoch 249: 100%|██████████| 4/4 [00:00<00:00,  5.95it/s, v_num=3]


In [7]:
# Load the best model after training
best_model_path = checkpoint_callback.best_model_path
model = ImageBackbone.load_from_checkpoint(best_model_path)

# Test model if there is a test dataloader
if not (test_dataloader is None):
    trainer.test(model, dataloaders=test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


Testing DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 10.49it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_Global          -3.1575732231140137
        test_MAE            1.0347423553466797
        test_MSE            1.7111455202102661
      test_PCC-Gene         0.12955906987190247
     test_PCC-Patch          0.879209578037262
      test_R2-Gene          -1.7244747877120972
      test_R2-Patch         0.3040209114551544
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
