In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
import numpy as np
import torch
import scanpy as sc
import muon as mu
import anndata
import mudata
import pandas as pd

In [3]:
%load_ext autoreload
%autoreload 2

In [18]:
mdata = mu.read("data/mdata_preprocessed_final_5k_obs.h5mu")
mdata

In [5]:
from src.utils import setup_mudata
setup_mudata(mdata)

n_cells = mdata["rna"].shape[0]
batch_id = torch.ByteTensor(mdata["rna"].obs.loc[:, "batch_id"].values)
_, N_b = torch.unique(batch_id, sorted=True, return_counts=True)
N_b.view(-1, 1) / n_cells

tensor([[0.0734],
        [0.0716],
        [0.0672],
        [0.0770],
        [0.0604],
        [0.0612],
        [0.0578],
        [0.0514],
        [0.0588],
        [0.0716],
        [0.0716],
        [0.0564],
        [0.0460],
        [0.0478],
        [0.0708],
        [0.0570]])

In [14]:
N_b.size(dim=0)

16

In [129]:
batch_codes = mdata["rna"].obs["sample"].astype("category")

In [88]:
ids = torch.as_tensor(batch_codes.cat.codes.values, dtype=torch.long).view(-1, 1)

In [82]:
ids

tensor([[ 8],
        [ 8],
        [ 5],
        ...,
        [ 8],
        [ 5],
        [10]], dtype=torch.int8)

In [7]:
Phi = torch.zeros(n_cells, N_b.size(dim=0))
Phi

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [11]:
Phi1 = torch.scatter(Phi, 1, batch_id_long, 1.0)
Phi1[10]

tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])

In [19]:
from src.model import MVAE, MVAEParams

mvae_params = MVAEParams(beta=0.01, n_layers=2, z_dim=100, n_hidden=300)

In [20]:
torch.set_num_threads(16)
model = MVAE(mdata, mvae_params, use_cuda=True)
model

N batches for mod1:  16
N batches for mod2:  16
(5000, 29400)
(5000, 12510)


MVAE(
  (rna): ModalityLayers(
    (shared_sampling): SamplingLayers(
      (mean): Sequential(
        (0): Linear(in_features=300, out_features=100, bias=True)
        (1): Dropout(p=0.3, inplace=False)
      )
      (logvar): Sequential(
        (0): Linear(in_features=300, out_features=100, bias=True)
        (1): Dropout(p=0.3, inplace=False)
      )
    )
    (batch_sampling): SamplingLayers(
      (mean): Sequential(
        (0): Linear(in_features=300, out_features=1600, bias=True)
        (1): Dropout(p=0.3, inplace=False)
      )
      (logvar): Sequential(
        (0): Linear(in_features=300, out_features=1600, bias=True)
        (1): Dropout(p=0.3, inplace=False)
      )
    )
    (private_sampling): SamplingLayers(
      (mean): Sequential(
        (0): Linear(in_features=300, out_features=100, bias=True)
        (1): Dropout(p=0.3, inplace=False)
      )
      (logvar): Sequential(
        (0): Linear(in_features=300, out_features=100, bias=True)
        (1): Dropout(p=0.

In [22]:
from src.train import train_mvae, TrainParams
train_params = TrainParams(train_size=0.8, n_epochs=100, learning_rate=1e-4, batch_size=128)

In [26]:
train_mvae(model, mdata, train_params)

Train data size: 4000
Test data size: 1000


Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



	Initialization is completed.
	Completed 1 / 10 iteration(s).
	Completed 2 / 10 iteration(s).
	Completed 3 / 10 iteration(s).
	Completed 4 / 10 iteration(s).


  0%|                                                    | 0/32 [00:02<?, ?it/s]

	Completed 5 / 10 iteration(s).
	Completed 6 / 10 iteration(s).
Reach convergence after 6 iteration(s).





array([[ 7.37845242e-01,  2.25084990e-01, -1.50886416e-01, ...,
         4.00086552e-01, -8.44041407e-01, -9.43560183e-01],
       [ 2.02376574e-01, -1.37602463e-01, -3.12753022e-04, ...,
         2.52221346e-01,  1.18156457e+00, -3.92428607e-01],
       [-8.54272842e-02,  7.49842763e-01,  9.84421134e-01, ...,
         1.24570675e-01, -3.34945142e-01, -6.46760702e-01],
       ...,
       [ 5.30178010e-01,  2.53921747e-03, -1.48054838e-01, ...,
        -1.31473899e-01, -1.73902661e-01,  1.23534098e-01],
       [ 5.22433639e-01,  3.93062264e-01, -4.36387151e-01, ...,
         9.57703829e-01, -6.65503263e-01,  5.44062138e-01],
       [ 2.44437441e-01,  2.12245464e-01, -1.99015766e-01, ...,
        -2.64628261e-01, -5.69045767e-02,  6.58127517e-02]], dtype=float32)

In [28]:
msi = anndata.AnnData(data["msi"].squeeze().numpy())
msi

AnnData object with n_obs × n_vars = 128 × 12510

In [30]:
a = mudata.MuData({"rna": rna, "msi": msi})
a



In [31]:
sc.pp.neighbors(a)
sc.tl.umap(a)

         Falling back to preprocessing with `sc.pp.pca` and default params.


AttributeError: 'NoneType' object has no attribute 'dtype'