# Zebrafish pigmentation

In [1]:
import warnings
warnings.filterwarnings('ignore')

import dynamo as dyn 
from dynamo.configuration import DKM
import numpy as np
import pandas as pd

this is like R's sessionInfo() which helps you to debug version related bugs if any. 

## Load data

In [2]:
adata_origin = dyn.sample_data.zebrafish()

|-----> Downloading data to ./data/zebrafish.h5ad


```
print(adata.X.min(), adata.X.max(), adata.X.mean())
adata.layers["spliced"] = adata.layers["spliced"].toarray()
temp = adata.layers["spliced"][~np.isnan(adata.layers["spliced"])]
print("unspliced data ")
print(temp.min(), temp.max(), temp.mean(),  temp.std())
adata.layers["unspliced"] = adata.layers["unspliced"].toarray()
temp = adata.layers["unspliced"][~np.isnan(adata.layers["unspliced"])]
print(temp.min(), temp.max(), temp.mean(), temp.std())
```

In [3]:
from SCTransform import SCTransform

## Apply pearson residual normalization on different layers according `select_genes_key`. We may use other select_genes function combined with pearson residual normalization.

In [7]:
# adata = dyn.sample_data.zebrafish()
from dynamo.preprocessing import Preprocessor
adata = dyn.sample_data.zebrafish()
preprocessor = Preprocessor()
preprocessor.config_monocle_recipe(adata)
preprocessor.normalize_by_cells = None
preprocessor.select_genes_function = dyn.preprocessing.select_genes_by_dispersion_svr
# preprocessor.select_genes_function = pearson_residual_normalization_recipe.select_genes_by_pearson_residual
# preprocessor.normalize_selected_genes_function = pearson_residual_normalization_recipe.normalize_layers_pearson_residuals
dyn.preprocessing.unique_var_obs_adata(adata)
preprocessor.filter_cells_by_outliers(adata, keep_filtered=False)
preprocessor.filter_genes_by_outliers(adata, inplace=True)
preprocessor.select_genes(adata, n_top_genes=2000)
adata = adata[:, adata.var["use_for_pca"]]

print("data before sctransform:")
print(pd.DataFrame(adata.X.data).describe())
SCTransform(adata, inplace=True)
print("data after sctransform:")
print(pd.DataFrame(adata.X.data).describe())
preprocessor.use_log1p = False
# preprocessor.preprocess_adata(adata)
preprocessor.pca(adata, n_pca_components=50, pca_key="X_pca")
dyn.data_io.cleanup(adata)
adata.write_h5ad("./data/zebrafish_sctransform_preprocessor.h5ad")


|-----> Downloading data to ./data/zebrafish.h5ad
|-----------> <insert> {} to uns['pp'] in AnnData Object.
|-----------> filtering cells by layer:X
|-----------> filtering cells by layer:spliced
|-----------> filtering cells by layer:unspliced
|-----> skip filtering by layer:protein as it is not in adata.
|-----> <insert> pass_basic_filter to obs in AnnData Object.
|-----------> inplace subsetting adata by filtered cells
|-----> filtering genes by dispersion...
|-----> select genes on var key: pass_basic_filter
|-----> select genes by recipe: dynamo_monocle


data before sctransform:
                  0
count  1.059567e+06
mean   3.726739e+00
std    1.098639e+01
min    1.000000e+00
25%    1.000000e+00
50%    1.000000e+00
75%    3.000000e+00
max    7.870000e+02
data after sctransform:
                   0
count  861530.000000
mean        2.123359
std         2.278595
min         0.000017
25%         0.637701
50%         1.411444
75%         2.721209
max        11.785584


NotImplementedError: Failed to write value for uns/velocyto_SVR/SVR, since a writer for type <class 'method'> has not been implemented yet.

Above error raised while writing key 'uns/velocyto_SVR/SVR' of <class 'h5py._hl.files.File'> from /.

In [None]:
# temp = adata.layers["spliced"][~np.isnan(adata.layers["spliced"])]
# print(pd.Series(temp).describe())
# temp = adata.layers["unspliced"][~np.isnan(adata.layers["unspliced"])]
# print(pd.Series(temp).describe())

In [None]:

celltype_key = "Cell_type"
dyn.tl.reduceDimension(adata, basis="pca")
dyn.pl.umap(adata, color=[celltype_key], figsize=(12, 12))


In [None]:
dyn.tl.leiden(adata, basis="pca")
dyn.pl.leiden(adata, basis="pca", color="Cell_type")
dyn.tl.louvain(adata, basis="pca")
dyn.pl.louvain(adata, basis="pca", color="Cell_type")

In [None]:
adata.layers["X_spliced"] = adata.layers["spliced"]
adata.layers["X_unspliced"] = adata.layers["unspliced"] 
adata.uns["pp"]["norm_method"] = "" 
dyn.tl.dynamics(adata, model='stochastic', cores=3)
dyn.pl.streamline_plot(adata, color=[celltype_key], basis='umap', show_legend='on data', show_arrowed_spines=True)


In [None]:
adata

In [None]:
dyn.pl.streamline_plot(adata, color=[celltype_key], basis='umap', show_legend='on data', show_arrowed_spines=True);


In [None]:
dyn.pl.umap(adata, color=['tfec', 'pnp4a'])

In [None]:
dyn.pl.phase_portraits(adata, genes=['Abcb7', 'Hectd3'],  figsize=(6, 4), color=celltype_key)

In [None]:
adata.uns["pp"]