In [None]:
import scvelo as scv
import numpy as np
import pandas as pd

import scanpy as sc
import matplotlib.pyplot as plt 

from velocity_scripts import local_velocity_smoothness,extract_knn_from_adata

In [None]:
adata = scv.datasets.pancreas()


In [None]:
sc.pp.normalize_per_cell(adata)
sc.pp.log1p(adata)


# Foreign HVG

In [None]:
hvg = np.loadtxt("./ZPSGenes.tsv",dtype=str)
hvg.shape

In [None]:
filtered = adata[:,hvg]

# Vignette Analysis

In [None]:
sc.pp.neighbors(filtered)
sc.tl.umap(filtered)
sc.pl.umap(filtered)

In [None]:
scv.pp.moments(filtered,n_neighbors=None,n_pcs=None)

In [None]:
scv.tl.velocity(filtered)
scv.tl.velocity_graph(filtered)


In [None]:
scv.pl.velocity_embedding(filtered,figsize=(30,30))

In [None]:
scv_raw_v = filtered.layers['velocity']

In [None]:
raw_velocity_norm = np.linalg.norm(scv_raw_v,axis=1)
filtered.obs['raw_velocity_norm'] = raw_velocity_norm

In [None]:
scv.pl.umap(filtered,color="raw_velocity_norm",figsize=(12,8))

# I don't understand the relationship between the arrow length in plot embedding umap and the raw velocity norms.
# Like... how are they getting the arrow magnitudes in plot embedding? 

In [None]:
knn = extract_knn_from_adata(filtered)

In [None]:
scv.tl.velocity_embedding(filtered,basis="umap")
scv_umap_v = filtered.obsm['velocity_umap']

scv_umap_t0 = filtered.obsm["X_umap"]
scv_umap_t1 = scv_umap_t0 + scv_umap_v

In [None]:
# We're going to be slightly cheeky here and get a velocity in graph space like so:

filtered.obsm["X_duplicate"] = np.array(filtered.X.todense())
scv.tl.velocity_embedding(filtered,basis="duplicate")

scv_graph_v = filtered.obsm["velocity_duplicate"]

In [None]:
# We really want to avoid having this impact the neighbor graph, maybe I should run it off a copy?

sc.pp.pca(filtered,n_comps=50)

scv.tl.velocity_embedding(filtered,basis="pca")
scv_pca_v = filtered.obsm['velocity_pca']

scv_pca_t0 = filtered.obsm["X_pca"]
scv_pca_t1 = scv_pca_t0 + (scv_pca_v)

pca_velocity_norm = np.linalg.norm(scv_pca_v,axis=1)
filtered.obs['pca_velocity_norm'] = pca_velocity_norm

# Check smoothness of raw vs inferred embedding

In [None]:
pca_smoothness_cosine = local_velocity_smoothness(scv_pca_v,knn,metric='cosine')
raw_smoothness_cosine = local_velocity_smoothness(scv_raw_v,knn,metric='cosine')
graph_smoothness_cosine = local_velocity_smoothness(scv_graph_v,knn,metric='cosine')
umap_smoothness_cosine = local_velocity_smoothness(scv_umap_v,knn,metric='cosine')

plt.figure()
plt.title("Raw Smoothness")
plt.hist(raw_smoothness_cosine)
plt.show()

plt.figure()
plt.title("Graph V Smoothness")
plt.hist(graph_smoothness_cosine)
plt.show()

plt.figure()
plt.title("PCA V Smoothness")
plt.hist(pca_smoothness_cosine)
plt.show()

plt.figure()
plt.title("UMAP V Smoothness")
plt.hist(umap_smoothness_cosine)
plt.show()


In [None]:
plt.figure()
plt.scatter(pca_smoothness_cosine,raw_smoothness_cosine,s=1)
plt.show()

plt.figure()
plt.scatter(pca_smoothness_cosine,umap_smoothness_cosine,s=1)
plt.show()

In [None]:
pca_smoothness_euclidean = local_velocity_smoothness(scv_pca_v,knn,metric='euclidean')
raw_smoothness_euclidean = local_velocity_smoothness(scv_raw_v,knn,metric='euclidean')
graph_smoothness_euclidean = local_velocity_smoothness(scv_graph_v,knn,metric='euclidean')
umap_smoothness_euclidean = local_velocity_smoothness(scv_umap_v,knn,metric='euclidean')

plt.figure()
plt.title("Raw Smoothness")
plt.hist(raw_smoothness_euclidean)
plt.show()

plt.figure()
plt.title("Graph V Smoothness")
plt.hist(graph_smoothness_euclidean)
plt.show()

plt.figure()
plt.title("PCA V Smoothness")
plt.hist(pca_smoothness_euclidean)
plt.show()

plt.figure()
plt.title("UMAP V Smoothness")
plt.hist(umap_smoothness_euclidean)
plt.show()


In [None]:
plt.figure()
plt.scatter(pca_smoothness_euclidean,raw_smoothness_euclidean,s=1)
plt.show()

plt.figure()
plt.scatter(pca_smoothness_euclidean,graph_smoothness_euclidean,s=1)
plt.show()


plt.figure()
plt.xlabel("Smoothness of raw graph velocity \n(mean euclidean distance in a neighborhood)")
plt.ylabel("Smoothness of PCA graph velocity \n(mean euclidean distance in a neighborhood)")
plt.scatter(graph_smoothness_euclidean,pca_smoothness_euclidean,s=1,c=raw_velocity_norm)
plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
plt.show()

plt.figure()
plt.xlabel("Smoothness of raw velocity \n(mean euclidean distance in a neighborhood)")
plt.ylabel("Smoothness of graph velocity \n(mean euclidean distance in a neighborhood)")
plt.scatter(raw_smoothness_euclidean,graph_smoothness_euclidean,s=1,c=raw_velocity_norm)
plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
plt.show()



# plt.figure()
# plt.xlabel("Smoothness of raw graph velocity \n(mean cosine distance in a neighborhood)")
# plt.ylabel("Smoothness of PCA graph velocity \n(mean cosine distance in a neighborhood)")
# plt.scatter(graph_smoothness_euclidean,pca_smoothness_cosine,s=1,c=raw_velocity_norm)
# plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
# plt.show()

# plt.figure()
# plt.xlabel("Smoothness of raw velocity \n(mean cosine distance in a neighborhood)")
# plt.ylabel("Smoothness of graph velocity \n(mean cosine distance in a neighborhood)")
# plt.scatter(raw_smoothness_euclidean,graph_smoothness_cosine,s=1,c=raw_velocity_norm)
# plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
# plt.show()

In [None]:
# Conclusion: smoothness of raw velocity seems almost unrelated to smoothness in PCA space
# Smoothness in graph raw velocity space seems relatively well correlated to smoothness in PCA space
# This speaks poorly of the relationship between raw and graph velocity 

In [None]:
# Need to figure out the normalization/scaling factor here. There's clearly a linear relationship but the scales are way off

In [None]:
# Ok, so now I guess let's look at the predicted data smoothness

In [None]:
import sys

sys.path.append("/Users/bbrener1/haxx/RNAForecasterPaperCode/src/")
import python_interface as py_n

In [None]:
params = {
    "hiddenLayerNodes": 6000,
    "batchSize": 200,
    "learningRate":1e-4,
    "nEpochs":100
}

py_n.train(scv_umap_t0,scv_umap_t1,params=params)
umap_futures = py_n.predict(scv_umap_t0,params={"damping":0.7})

In [None]:
umap_predicted_delta = umap_futures[0] - scv_umap_t0

plt.figure()
plt.scatter(umap_predicted_delta.flatten(),scv_umap_v.flatten(),s=1)
plt.show()

In [None]:
from velocity_scripts import trajectory_series

trajectory_series([umap_futures[0],umap_futures[1],umap_futures[2],umap_futures[3]])

In [None]:
umap_prediction_smoothness = local_velocity_smoothness(umap_predicted_delta,knn,metric='euclidean')
umap_actual_smoothness = local_velocity_smoothness(scv_umap_v,knn,metric='euclidean')

# plt.figure()
# plt.xlabel("Smoothness of raw graph velocity \n(mean euclidean distance in a neighborhood)")
# plt.ylabel("Smoothness of umap graph velocity \n(mean euclidean distance in a neighborhood)")
# plt.scatter(graph_smoothness_euclidean,umap_prediction_smoothness,s=1,c=raw_velocity_norm)
# plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
# plt.show()

plt.figure()
plt.scatter(umap_prediction_smoothness,umap_actual_smoothness,s=1,c=raw_velocity_norm)
plt.plot([.02,.1],[.02,.1],color='red',linestyle="--")
plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
plt.xlabel("Prediction smoothness")
plt.ylabel("Actual smoothness")
plt.show()


In [None]:
# Optional re-basis

sc.pp.pca(filtered,n_comps=50)

scv.tl.velocity_embedding(filtered,basis="pca")
scv_pca_v = filtered.obsm['velocity_pca']

scv_pca_t0 = filtered.obsm["X_pca"]
scv_pca_t1 = scv_pca_t0 + (scv_pca_v)

pca_velocity_norm = np.linalg.norm(scv_pca_v,axis=1)
filtered.obs['pca_velocity_norm'] = pca_velocity_norm


In [None]:
plt.figure()
plt.scatter(pca_velocity_norm.flatten(),raw_velocity_norm.flatten(),s=1)
plt.show()

In [None]:
# # PCA prediction

# params = {
#     "hiddenLayerNodes": 1000,
#     "batchSize": 200,
#     "learningRate":1e-4,
#     "nEpochs":300
# }

# py_n.train(scv_pca_t0,scv_pca_t1,params=params)

pca_futures = py_n.predict(scv_pca_t0,params={"damping":0.7,"tSteps":20})

pca_futures.shape

In [None]:
pca_predicted_delta = pca_futures[0] - scv_pca_t0

plt.figure()
plt.title("predicted vs true k=50 pca, 300 epochs")
plt.scatter(pca_predicted_delta.flatten(),scv_pca_v.flatten(),s=1)
axlim_min = min(min(pca_predicted_delta.flatten()),min(scv_pca_v.flatten())) * .8
axlim_max = max(max(pca_predicted_delta.flatten()),max(scv_pca_v.flatten())) * .8
plt.plot([axlim_min,axlim_max],[axlim_min,axlim_max],color='red',linestyle="--")
plt.show()

In [None]:
pca_prediction_smoothness = local_velocity_smoothness(pca_predicted_delta,knn,metric='euclidean')
pca_actual_smoothness = local_velocity_smoothness(scv_pca_v,knn,metric='euclidean')

plt.figure()
plt.title("Smoothness of NeuODE Predictions compared to true PCA embedding")
plt.scatter(pca_prediction_smoothness,pca_actual_smoothness,s=1,c=raw_velocity_norm)
axlim = max(max(pca_prediction_smoothness),max(pca_actual_smoothness))
plt.plot([.02,axlim],[.02,axlim],color='red',linestyle="--")
plt.colorbar(label="Magnitude of raw velocity (L2 norm)")
plt.xlabel("Prediction smoothness")
plt.ylabel("Actual smoothness")
plt.show()

plt.figure()
plt.title("Smoothness of NeuODE Predictions compared to true PCA embedding")
plt.scatter(pca_prediction_smoothness,pca_actual_smoothness,s=1,c=pca_velocity_norm)
axlim = max(max(pca_prediction_smoothness),max(pca_actual_smoothness))
plt.plot([.02,axlim],[.02,axlim],color='red',linestyle="--")
plt.colorbar(label="Magnitude of pca velocity (L2 norm)")
plt.xlabel("Prediction smoothness")
plt.ylabel("Actual smoothness")
plt.show()


In [None]:
# Does the better PCA future embed any more gracefully? 

In [None]:
from velocity_scripts import umap_velocity_via_joint,trajectory_series,umap_trajectory_joint

In [None]:
pca_umap_t0,pca_umap_t1,pca_umap_v = umap_velocity_via_joint(scv_pca_t0,pca_futures[5])

In [None]:
trajectory_series([pca_umap_t0,pca_umap_t1],frequency=3)

In [None]:

def umap_trajectory_joint(timepoints):
    if len(timepoints) > 100:
        raise Exception("You want timepoints in a list, I think you might have passed a matrix, primary dimension > 100")
    stacked = np.vstack(timepoints)
    
    umap_model = UMAP(n_neighbors=15,min_dist=0.5, spread=1.0, n_components=2, negative_sample_rate=5, random_state=0,metric='cosine')
    u_t_joint = umap_model.fit_transform(stacked)
    
    running_totals = np.cumsum([t.shape[0] for t in timepoints])
    running_totals = [0,] + list(running_totals)
    embedded_timepoints = [u_t_joint[beginning:end] for beginning,end in zip(running_totals[:-1],running_totals[1:])]
    
    return embedded_timepoints


In [None]:
umap_trajectories = umap_trajectory_joint([scv_pca_t0,pca_futures[10],pca_futures[19]])

In [None]:
from umap import UMAP

In [None]:
trajectory_series(umap_trajectories)

In [None]:
umap_trajectories