In [None]:
#### kernal: python3.12_envs

In [13]:
import scanpy as sc
# pip install NumPy==1.9
# pip install -U scvelo
import scvelo as scv
import sys
from functools import wraps
import numpy as np
import pandas as pd 
import anndata as ad
import cellrank as cr
import pickle
# import h5py

In [2]:
# pip install numpy
# pip install pandas 
# pip install cellrank
# pip install anndata

#### 1. load seurat data

In [13]:
adata_con = sc.read_h5ad("/2_Velocity/Obj_Control.h5ad")
adata_con.obs.index = adata_con.obs.barcode2
adata_ko = sc.read_h5ad("/2_Velocity/Obj_vcl.h5ad")
adata_ko.obs.index = adata_ko.obs.barcode2


#### 2. load loom data

In [None]:
path = "/home/lfliuhku/rawdata/Vcl_ENCC/"
ldata1 = scv.read(path + '/E15.5/E15_5_ENCC_Vcl_control/velocyto/E15_5_ENCC_Vcl_control.loom', cache=True) 
barcodes = [bc.split(':')[1] for bc in ldata1.obs.index.tolist()]  # split the bc using ":"
ldata1.obs.index = [bc.replace("x", "-3") for bc in barcodes]  # add the sample in barcode

ldata2 = scv.read(path + '/E15.5/E15_5_ENCC_Vcl_cKO/velocyto/E15_5_ENCC_Vcl_cKO.loom', cache=True) 
barcodes = [bc.split(':')[1] for bc in ldata2.obs.index.tolist()]  # split the bc using ":"
ldata2.obs.index = [bc.replace("x", "-4") for bc in barcodes]  # add the sample in barcode

ldata3 = scv.read(path + '/E13.5/YFP_ENCC_control/velocyto/YFP_ENCC_control.loom', cache=True) 
barcodes = [bc.split(':')[1] for bc in ldata3.obs.index.tolist()]  # split the bc using ":"
ldata3.obs.index = [bc.replace("x", "-1") for bc in barcodes]  # add the sample in barcode

ldata4 = scv.read(path + '/E13.5/YFP_ENCC_Vcl/velocyto/YFP_ENCC_Vcl.loom', cache=True) 
barcodes = [bc.split(':')[1] for bc in ldata4.obs.index.tolist()]  # split the bc using ":"
ldata4.obs.index = [bc.replace("x", "-2") for bc in barcodes]  # add the sample in barcode

ldata1.var_names_make_unique() 
ldata2.var_names_make_unique() 
ldata3.var_names_make_unique() 
ldata4.var_names_make_unique() 

#### 3. merge data

In [None]:
##### branch A / group data
ldata_con = ldata1.concatenate([ldata3])
ldata_ko = ldata2.concatenate([ldata4])

barcodes = ldata_con.obs.index.tolist()
ldata_con.obs.index = [bc[0:18] for bc in barcodes] 

barcodes = ldata_ko.obs.index.tolist()
ldata_ko.obs.index = [bc[0:18] for bc in barcodes] 

adata_con2 = scv.utils.merge(adata_con, ldata_con)
adata_ko2 = scv.utils.merge(adata_ko, ldata_ko)

In [None]:
# path = "/home/lfliuhku/projects/Vcl_mouse/2_trajectory/RNAvelo/"
# with open("./outputdata/1_mergerdata.h5", "wb") as file:
#     pickle.dump(adata, file)

# with h5py.File(path + "/outputdata/1_mergedata.h5", 'w') as hdf:
#     hdf.create_dataset('adata', data= adata)

In [103]:
# ##### visualize the proportion of unspliced and spliced
# adata.obs['group']=adata.obs['group'].astype('category').values
# scv.pl.proportions(adata, groupby='group')

# ##### visualize the proportion of unspliced and spliced
# adata.obs['main_celltype']=adata.obs['main_celltype'].astype('category').values
# scv.pl.proportions(adata, groupby='main_celltype')

#### 4 normalize data

In [16]:
scv.pp.filter_genes(adata_con2, min_shared_counts=20)
scv.pp.normalize_per_cell(adata_con2)
scv.pp.filter_genes_dispersion(adata_con2, n_top_genes=2000)
scv.pp.log1p(adata_con2)

scv.pp.filter_and_normalize(adata_con2) 

sc.pp.pca(adata_con2) # some na value
# np.sum(np.isnan(adata_con2.X))

sc.pp.neighbors(adata_con2, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata_con2, n_pcs=None, n_neighbors=None)


scv.pp.filter_genes(adata_ko2, min_shared_counts=20)
scv.pp.normalize_per_cell(adata_ko2)
scv.pp.filter_genes_dispersion(adata_ko2, n_top_genes=2000)
scv.pp.log1p(adata_ko2)

scv.pp.filter_and_normalize(adata_ko2) 

sc.pp.pca(adata_ko2) # some na value
# np.sum(np.isnan(adata_ko2.X))

sc.pp.neighbors(adata_ko2, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata_ko2, n_pcs=None, n_neighbors=None)

#### 5. calculate the velocity

In [None]:
# 使用稳态模型（随机选项）计算RNA速率
scv.tl.velocity(adata_con2, mode='stochastic') 
scv.tl.velocity_graph(adata_con2) 

scv.tl.velocity(adata_ko2, mode='stochastic') 
scv.tl.velocity_graph(adata_ko2) 


#### 6. visualization (all)

In [1]:

##### visualize the proportion of unspliced and spliced
adata_con2.obs['final.annotation']=adata_con2.obs['final.annotation'].astype('category').values
scv.pl.proportions(adata_con2, groupby='final.annotation')

adata_ko2.obs['final.annotation']=adata_ko2.obs['final.annotation'].astype('category').values
scv.pl.proportions(adata_ko2, groupby='final.annotation')

In [2]:

scv.pl.velocity_embedding_grid(adata_con2, basis='X_umap', color='final.annotation', save='F0_embedding_umap_final.annotation_control.pdf', title='', scale = 1, arrow_size = 3, density = 0.6, arrow_length = 3,legend_loc='none', alpha = 0.2,figsize=[4,5], palette={"GP":"#feb462","BP":"#6b853e", "Neuroblast": "#fdcee6", "BranchA": "#bca9f5", "BranchB": "#fac5b3"})

scv.pl.velocity_embedding_grid(adata_ko2, basis='X_umap', color='final.annotation', save='F0_embedding_umap_final.annotation_mutant.pdf', title='', scale = 1, arrow_size = 3, density = 0.6, arrow_length = 3,legend_loc='none',figsize=[4,5], alpha = 0.2, palette={"GP":"#feb462","BP":"#6b853e", "Neuroblast": "#fdcee6", "BranchA": "#bca9f5", "BranchB": "#fac5b3"})


In [3]:

###### control
scv.tl.velocity_confidence(adata_con2)

velocity_length = adata_con2.obs['velocity_length'].values
v_min = np.min(velocity_length)
v_max = np.max(velocity_length)
velocity_length_scaled = (velocity_length - v_min) / (v_max - v_min)

adata_con2.obs['scaled_velocity_length'] = velocity_length_scaled

###### ko
scv.tl.velocity_confidence(adata_ko2)

velocity_length = adata_ko2.obs['velocity_length'].values
v_min = np.min(velocity_length)
v_max = np.max(velocity_length)
velocity_length_scaled = (velocity_length - v_min) / (v_max - v_min)
adata_ko2.obs['scaled_velocity_length'] = velocity_length_scaled

np.percentile(adata_ko2.obs['scaled_velocity_length'], 98)
np.percentile(adata_con2.obs['scaled_velocity_length'], 98)

np.percentile(adata_ko2.obs['scaled_velocity_length'], 2)
np.percentile(adata_con2.obs['scaled_velocity_length'], 2) 

scv.pl.scatter(adata_con2, size=40, c="scaled_velocity_length", figsize =(4,5), fontsize=18,cmap='coolwarm', perc=[2, 98], vmin=0, vmax=0.8, title = "Control: scaled velocity length")
scv.pl.scatter(adata_ko2, size=40, c="scaled_velocity_length", figsize =(4,5), fontsize=18, cmap='coolwarm', perc=[2, 98], vmin=0, vmax=0.8, title = "Mutant: scaled velocity length")

