In [1]:
from cellij.core.models import MOFA
import muon as mu
import scanpy as sc
import pandas as pd
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# When using a MOFA model, the only argument that is required is the number of factors.
mofa = MOFA(n_factors=10)


In [3]:
# Load CLL data
modalities = {}
for ome in ["drugs", "methylation", "mrna", "mutations"]:
    modalities[ome] = sc.AnnData(
        pd.read_csv(
            filepath_or_buffer=f"../data/cll/cll_{ome}.csv",
            sep=",",
            index_col=0,
            encoding="utf_8",
        ),
        dtype=np.float32,
    )

obs = pd.read_csv(
    filepath_or_buffer="../data/cll/cll_metadata.csv",
    sep=",",
    index_col="Sample",
    encoding="utf_8",
)

mdata = mu.MuData(modalities)
mdata.obs = mdata.obs.join(obs)




In [4]:
# Add data to the model
mofa.add_data(data=mdata, name="cll")


In [5]:
# We fit a model that only factorizes the mrna data into a product
# of two low-rank matrices. The other modalities are not factorized.
mofa.fit(
    likelihood="Normal",
)


Epoch      0:    17651130.95
Epoch    100:     1282976.94
Epoch    200:     1219672.84
Epoch    300:     1164368.24
Epoch    400:     1113559.58
Epoch    500:     1081006.19
Epoch    600:     1066368.58
Epoch    700:     1055290.83
Epoch    800:     1049780.88
Epoch    900:     1046879.43
Epoch   1000:     1042745.73
