In [None]:
import scanpy as sc
import pandas as pd
import os 
import numpy as np
from scipy.sparse import issparse, csr_matrix

os.chdir("/Users/bombina2/github/Reg_Ax/ST_workflow")

adata = sc.read_h5ad("/Users/bombina2/github/Reg_Ax/CCBR/output/vizium_test.h5ad")

def add_qc_metrics(adata, organism="hs", mt_match_pattern=None, layer=None,
                   log1p=False):
    # identify mitochondrial genes pattern
    if mt_match_pattern is None:
        if organism == "hs":
            mt_match_pattern = "MT-"
        elif organism == "mm":
            mt_match_pattern = "mt-"
        else:
            raise ValueError("Unknown organism")
    adata.var["mt"] = adata.var_names.str.startswith(mt_match_pattern)
    # calculate QC metrics
    sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True, 
        log1p=False, percent_top=None, layer=layer
    )

def add_qc_metrics_2(adata, organism="hs", mt_match_pattern=None, layer=None):
    # identify mitochondrial genes pattern
    if mt_match_pattern is None:
        if organism == "hs":
            mt_match_pattern = "MT-"
        elif organism == "mm":
            mt_match_pattern = "mt-"
        else:
            raise ValueError("Unknown organism")
    adata.var["mt"] = adata.var_names.str.startswith(mt_match_pattern)
    test_matrix = adata.X
    # Check if adata.X is sparse, and convert if necessary
    if not issparse(test_matrix):
        test_matrix = csr_matrix(test_matrix)

    # Calculate total number of genes with values > 0 for each cell
    adata.obs["nFeatue"] = np.array((test_matrix > 0).sum(axis=1)).flatten()
    # Calculate the sum of counts for all genes for each cell
    adata.obs["nCount"] = np.array(test_matrix.sum(axis=1)).flatten()

add_qc_metrics(adata, organism="hs")
add_qc_metrics_2(adata, organism="hs")

In [None]:
def add_qc_metrics_2(adata, organism="hs", mt_match_pattern=None, layer=None):
    # identify mitochondrial genes pattern
    if mt_match_pattern is None:
        if organism == "hs":
            mt_match_pattern = "MT-"
        elif organism == "mm":
            mt_match_pattern = "mt-"
        else:
            raise ValueError("Unknown organism")

    test_matrix = adata.X
    # Check if adata.X is sparse, and convert if necessary
    if not issparse(test_matrix):
        test_matrix = csr_matrix(test_matrix)

    # Calculate total number of genes with values > 0 for each cell
    adata.obs["nFeatue"] = np.array((test_matrix > 0).sum(axis=1)).flatten()
    # Calculate the sum of counts for all genes for each cell
    adata.obs["nCount"] = np.array(test_matrix.sum(axis=1)).flatten()
    mt_genes = adata.var_names.str.startswith(mt_match_pattern)
    # Calculate the sum of counts for mitochondrial genes for each cell
    adata.obs["nCount_mt"] = np.array(test_matrix[:, mt_genes].sum(axis=1)).flatten()
    # Calculate the percentage of counts in mitochondrial genes for each cell
    adata.obs["percent.mt"] = (adata.obs["nCount_mt"] / adata.obs["nCount"]) * 100
    # Calculate the percentage of counts in mitochondrial genes for each cell
    adata.obs["percent.mt"] = adata.obs["percent.mt"].fillna(0)
    # Calculate the percentage of counts in mitochondrial genes for each cell
    adata.obs["percent.mt"] = adata.obs["percent.mt"].astype(float)


In [None]:
meta = adata.obs.copy()

In [None]:
atlas_adata = sc.read_h5ad("/Users/bombina2/github/Reg_Ax/CCBR/output/example/lung_atlas.h5ad")

In [None]:
atlas_adata

In [None]:
add_qc_metrics(atlas_adata, organism="hs")


In [None]:
add_qc_metrics_2(atlas_adata, organism="hs")

In [None]:
atlas_meta = atlas_adata.obs.copy()

In [None]:
mt_genes = atlas_adata.var_names.str.startswith("MT-")
mt_genes