In [24]:
import re
import numpy as np
import pandas as pd
import scanpy as sc
import scvelo as scv
from scipy import sparse, stats
from anndata import AnnData

dir = "./"
scv.settings.set_figure_params('scvelo')

In [25]:
adata_CD4 = scv.read(f"{dir}/../data/velo/adata_CD4.h5ad", cache=True)
adata_CD8 = scv.read(f"{dir}/../data/velo/adata_CD8.h5ad", cache=True)

In [26]:
adata_CD4.var_names_make_unique()
adata_CD8.var_names_make_unique()
scv.pp.filter_and_normalize(adata_CD4, log=False)
scv.pp.filter_and_normalize(adata_CD8, log=False)

Normalized count data: X, spliced, unspliced.
Normalized count data: X, spliced, unspliced.


## convert cells to miniclusters

In [33]:
def CreadMiniAdata (iadata, icellinfo):
    minis = icellinfo.miniCluster.unique()
    spliced = np.empty(shape=(0, iadata.layers['spliced'].shape[1]), dtype="float32")
    unspliced = np.empty(shape=(0, iadata.layers['unspliced'].shape[1]), dtype="float32")
    
    for mini in minis:
        sub_cells = list(icellinfo[icellinfo.miniCluster==mini].index)
        sub_adata = iadata[sub_cells,]
    
        if len(sub_cells) > 1:
            sub_spliced   = np.mean(sub_adata.layers['spliced'], axis=0)
            sub_unspliced = np.mean(sub_adata.layers['unspliced'], axis=0)
        else:
            sub_spliced   = sub_adata.layers['spliced'].todense()
            sub_unspliced = sub_adata.layers['unspliced'].todense()
    
        spliced   = np.r_[spliced, sub_spliced]
        unspliced = np.r_[unspliced, sub_unspliced]

    spliced   = sparse.csr_matrix(spliced)
    unspliced = sparse.csr_matrix(unspliced)
    adata_new = AnnData(X=spliced.copy(), 
                    obs=pd.DataFrame(index=minis),
                    var=iadata.var,
                    layers={'spliced':spliced.copy(), 'unspliced':unspliced.copy()})
    
    return(adata_new)

In [34]:
adata_mini_CD4 = CreadMiniAdata(adata_CD4, adata_CD4.obs)

In [35]:
adata_mini_CD8 = CreadMiniAdata(adata_CD8, adata_CD8.obs)

In [37]:
print(adata_mini_CD4)
print(adata_mini_CD8)

AnnData object with n_obs × n_vars = 3552 × 33694
    var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'gene_count_corr'
    layers: 'spliced', 'unspliced'
AnnData object with n_obs × n_vars = 4099 × 33694
    var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'gene_count_corr'
    layers: 'spliced', 'unspliced'


In [38]:
adata_mini_CD4.write(f"{dir}/../data/velo/adata_mini_CD4.h5ad")
adata_mini_CD8.write(f"{dir}/../data/velo/adata_mini_CD8.h5ad")