In [None]:
import anndata
import scvelo as scv
import pandas as pd
import numpy as np
import matplotlib as plt

adata = scv.read("Data/velocyto/possorted_genome_bam_1.loom", cache=True)

In [None]:
scv.pl.proportions(adata)
scv.utils.show_proportions(adata)

In [None]:
adata.obs.index = adata.obs.index.str.extract('\:(.*?)(?=x)')[0]
adata.obs.index

In [None]:
cellID_obs = pd.read_csv("Data/Cells/CD4/cd4.cellID_obs.csv", index_col=0)['x']
umap_cord = pd.read_csv("Data/Cells/CD4/cd4.cell_embeddings_umap.csv")
phate_cord = pd.read_csv("Data/Cells/CD4/cd4.cell_embeddings_phate.csv")
cell_clusters = pd.read_csv("Data/Cells/CD4/cd4.clusters.csv")
tumors = pd.read_csv("Data/Cells/CD4/cd4.tumor.csv")

In [None]:
adata.obs.index = [cell+'-1' for cell in adata.obs.index]
adata = adata[np.isin(adata.obs.index,cellID_obs)]

In [None]:
scv.pl.proportions(adata)

In [None]:
adata_index = pd.DataFrame(adata.obs.index)
adata_index = adata_index.rename(columns = {0:'Cell ID'})

In [None]:
umap_cord = umap_cord.rename(columns={'Unnamed: 0':'Cell ID'})
umap_ordered = adata_index.merge(umap_cord,on="Cell ID")
umap_ordered = umap_ordered.iloc[:,1:]
adata.obsm['X_umap'] = umap_ordered.values

In [None]:
phate_cord = phate_cord.rename(columns={'Unnamed: 0':'Cell ID'})
phate_ordered = adata_index.merge(phate_cord,on="Cell ID")
phate_ordered = phate_ordered.iloc[:,1:]
adata.obsm['X_phate'] = phate_ordered.values

In [None]:
cell_clusters = cell_clusters.rename(columns={'Unnamed: 0':'Cell ID'})
cell_clusters_ordered = adata_index.merge(cell_clusters,on="Cell ID")
cell_clusters_ordered = cell_clusters_ordered.iloc[:,1:]
adata.obs['clusters'] = cell_clusters_ordered.values

In [None]:
tumors = tumors.rename(columns={'Unnamed: 0':'Cell ID'})
tumors_ordered = adata_index.merge(tumors,on="Cell ID")
tumors_ordered = tumors_ordered.iloc[:,1:]
adata.obs['tumors'] = tumors_ordered.values

In [None]:
adata

In [None]:
scv.pp.filter_and_normalize(adata)
scv.pp.moments(adata)
scv.tl.velocity(adata, mode = "stochastic")
scv.tl.velocity_graph(adata)

In [None]:
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200

In [None]:
scv.pl.velocity_embedding(adata, basis='umap', color='clusters')
scv.pl.velocity_embedding_stream(adata, basis='umap', color='clusters')
scv.pl.velocity_embedding_grid(adata, basis='umap', color='clusters')

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='umap', smooth = 1, density = 1.5, color = 'clusters', arrow_length=3)

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='phate', smooth = 1, density = 1.5, color = 'clusters', arrow_length=1)

In [None]:
adata.obs['tumors']

In [None]:
b16 = adata[adata.obs['tumors'] == 'b16']
scv.tl.velocity(b16, mode = "stochastic")
scv.tl.velocity_graph(b16)
scv.pl.velocity_embedding(b16, basis='umap', color='clusters')
scv.pl.velocity_embedding_stream(b16, basis='umap', color='clusters')
scv.pl.velocity_embedding_grid(b16, basis='umap', color='clusters')
scv.pl.velocity_embedding_grid(b16, basis='umap', smooth = 1, density = 1.5, color = 'clusters', arrow_length=3)
scv.tl.paga(b16, groups='clusters')
scv.pl.paga(b16, basis='umap', size=50, alpha=.1,
            min_edge_width=2, node_size_scale=2)

In [None]:
b16il33 = adata[adata.obs['tumors'] == 'b16il33']
scv.tl.velocity(b16il33, mode = "stochastic")
scv.tl.velocity_graph(b16il33)
scv.pl.velocity_embedding(b16il33, basis='umap', color='clusters')
scv.pl.velocity_embedding_stream(b16il33, basis='umap', color='clusters')
scv.pl.velocity_embedding_grid(b16il33, basis='umap', color='clusters')
scv.pl.velocity_embedding_grid(b16il33, basis='umap', smooth = 1, density = 1.5, color = 'clusters', arrow_length=3)
scv.tl.paga(b16il33, groups='clusters')
scv.pl.paga(b16il33, basis='umap', size=50, alpha=.1,
            min_edge_width=2, node_size_scale=2)

In [None]:
scv.tl.rank_velocity_genes(adata, groupby='clusters', min_corr=.3)

df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
df.head()

In [None]:
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(adata, c=keys, cmap='coolwarm', perc=[5, 95])

In [None]:
df = adata.obs.groupby('clusters')[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)

In [None]:
# this is needed due to a current bug - bugfix is coming soon.
adata.uns['neighbors']['distances'] = adata.obsp['distances']
adata.uns['neighbors']['connectivities'] = adata.obsp['connectivities']

scv.tl.paga(adata, groups='clusters')
df = scv.get_df(adata, 'paga/transitions_confidence', precision=2).T
df.style.background_gradient(cmap='Blues').format('{:.2g}')

scv.pl.paga(adata, basis='umap', size=50, alpha=.1,
            min_edge_width=2, node_size_scale=2)

In [None]:
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)

In [None]:
scv.pl.velocity_embedding(adata, basis='umap', color='clusters')
scv.pl.velocity_embedding_stream(adata, basis='umap', color='clusters')
scv.pl.velocity_embedding_grid(adata, basis='umap', color='clusters')

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='umap', smooth = 1, density = 1.5, color = 'clusters', arrow_length=3)

In [None]:
df = adata.var
df = df[(df['fit_likelihood'] > .1) & df['velocity_genes'] == True]

kwargs = dict(xscale='log', fontsize=16)
with scv.GridSpec(ncols=3) as pl:
    pl.hist(df['fit_alpha'], xlabel='transcription rate', **kwargs)
    pl.hist(df['fit_beta'] * df['fit_scaling'], xlabel='splicing rate', xticks=[.1, .4, 1], **kwargs)
    pl.hist(df['fit_gamma'], xlabel='degradation rate', xticks=[.1, .4, 1], **kwargs)

scv.get_df(adata, 'fit*', dropna=True).head()


In [None]:
scv.tl.latent_time(adata)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)


In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)

In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, basis=top_genes[:15], ncols=5, frameon=False)


In [None]:
scv.tl.rank_dynamical_genes(adata, groupby='clusters')
df = scv.get_df(adata, 'rank_dynamical_genes/names')
df.head(5)

In [None]:
for cluster in df.columns:
    scv.pl.scatter(adata, df[cluster][:5], ylabel=cluster, frameon=False)