# Clustering multimodal data with sincei

In [None]:
import numpy as np
import pandas as pd

from sincei.multimodalClustering import (
    multiModal_clustering,
    umap_aligned
)

import mudata as md
import scanpy as sc

%matplotlib inline

## Load data

In [None]:
multi_modal_path = 'sincei_output/scCounts_10x_multiome_clustered.h5mu'
mdata = md.read_h5mu(multi_modal_path)
mdata

## Load metadata and add celltype

In [None]:
metadata = pd.read_csv('metadata_cd34_rna.csv', header=0, index_col=0)
metadata.index = metadata.index.str.replace('rna_', '')

In [None]:
mdata.obs = mdata.obs.merge(metadata['celltype'], left_index=True, right_index=True)
mdata

## Clustering

In [None]:
multiModal_clustering(
    mdata=mdata,
    modalities=['rna', 'atac'],
    method=['glmPCA', 'LSA'],
    modal_weights=[1, 1],
    nK=30,
    nPrinComps=[20, 30],
    clusterResolution=[0.8, 1.0],
    binarize=False,
    glmPCAfamily='poisson',
)
mdata
# mdata.write_h5mu('sincei_output/10x_multiome_clustered.h5mu')

In [None]:
mdata['rna'].obs['celltype'] = mdata.obs['celltype']
mdata['atac'].obs['celltype'] = mdata.obs['celltype']

mdata['rna'].obs['cluster_multi'] = mdata.obs['cluster_multi']
mdata['atac'].obs['cluster_multi'] = mdata.obs['cluster_multi']

mdata['rna'].obs['leiden_muon'] = mdata.obs['leiden_muon']
mdata['atac'].obs['leiden_muon'] = mdata.obs['leiden_muon']

In [None]:
sc.pl.umap(mdata['rna'], color=['cluster_multi', 'leiden','celltype', 'sample'], ncols=2)
sc.pl.umap(mdata['atac'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)

## Align UMAP

In [None]:
umap_aligned(
    mdata,
    modalities=['rna', 'atac'],
    nK=30,
    distance_metric="euclidean",
)

In [None]:
sc.pl.umap(mdata['rna'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)
sc.pl.umap(mdata['atac'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)