In [None]:
%load_ext pretty_jupyter

In [None]:
# -.-|m { input: false, output: false, input_fold: show}

import tomlkit
import scanpy as sc
from anndata import AnnData

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import patchworklib as pw


from IPython.display import display
from os import path
import session_info
import logging

logging.basicConfig(level=logging.ERROR)


# Utility functions

In [None]:

def get_var_features_num(adata: AnnData, variable_features: int|float):
    detected_gene_nu = len(adata.var_names) 
    if variable_features <= 1:
        return int(detected_gene_nu * variable_features)
    else:
        return min(detected_gene_nu, variable_features)
    

# Config

In [None]:
## Pipeline parameters

with open("../config.toml", "r") as f:
    config = tomlkit.parse(f.read())

In [None]:
ROOT_DIR = config["basic"]["ANALYSIS_DIR"]
DIR_SAVE = path.join(ROOT_DIR, config["basic"]["DIR_SAVE"])

NORMALIZATION_METHOD = config["normalization"]["NORMALIZATION_METHOD"]
VARIABLE_FEATURES = config["normalization"]["VARIABLE_FEATURES"]
COUNTS_LAYER = config["normalization"]["COUNTS_LAYER"]


# Reading Files

In [48]:
import gc 
gc.collect()

482

In [47]:
del adata

NameError: name 'adata' is not defined

In [26]:
adata = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))

# Normalize object

In [29]:
if np.min(adata.X) < 0:
    if COUNTS_LAYER in adata.layers.keys():
        adata.X = adata.layers[COUNTS_LAYER]
    else:
        raise ValueError("obj.X seems to be normalized and and the {COUNTS_LAYER} layer can't be found in the object")
else:
    adata.layers[COUNTS_LAYER] = adata.X.copy()

In [16]:
adata.X = adata.layers["counts"].copy()

In [None]:

sc.pp.normalize_total(adata)


In [42]:
adata.layers["counts"][1:100, 1:100].toarray()

array([[0.        , 0.        , 0.        , ..., 0.69314718, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.69314718, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.69314718, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.38629436, 0.        ,
        0.69314718],
       [0.        , 0.        , 0.        , ..., 1.60943791, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 1.09861229, 0.        ,
        0.        ]])

In [32]:
sc.pp.log1p(adata)



# Select Variable Features

In [None]:
var_features = get_var_features_num(adata, VARIABLE_FEATURES)

sc.pp.highly_variable_genes(adata, n_top_genes=var_features, batch_key="sample")
sc.pl.highly_variable_genes(adata)
