In [1]:
import sklearn

In [2]:
import scanpy as sc

In [3]:
import scvelo

In [4]:
import json
from typing import Dict
import pandas as pd
import torch
from pathlib import Path
from itertools import repeat
from collections import OrderedDict
from collections.abc import Mapping
from scvelo.core import l2_norm, prod_sum, sum

In [7]:
import numpy as np
from scipy.sparse import csr_matrix

## Import adata objects

In [6]:
adata_scvDeter = sc.read_h5ad('/home/sarah.ancheta/bruno1/projects/data.science/sarah.ancheta/ZF_atlas/rna_velocity/benchmark/model_runs/nmp_dataset/scVelo_deterministic_Jun22.h5ad')

adata_scvSto = sc.read_h5ad('/home/sarah.ancheta/bruno1/projects/data.science/sarah.ancheta/ZF_atlas/rna_velocity/benchmark/model_runs/nmp_dataset/scVelo_stochastic_Jun22.h5ad')

adata_scvDyn = sc.read_h5ad('/home/sarah.ancheta/bruno1/projects/data.science/sarah.ancheta/ZF_atlas/rna_velocity/benchmark/model_runs/nmp_dataset/scVelo_dynamical_Jun22.h5ad')

adata_uniTvel = sc.read_h5ad('/home/sarah.ancheta/bruno1/projects/data.science/sarah.ancheta/ZF_atlas/rna_velocity/benchmark/model_runs/nmp_dataset/uniTVelo_Jun22.h5ad')

adata_deepVel = sc.read_h5ad('/home/sarah.ancheta/bruno1/projects/data.science/sarah.ancheta/ZF_atlas/rna_velocity/benchmark/model_runs/nmp_dataset/deepVelo_Jun22.h5ad')





# Let's try computing cell-cell similarity, across different methods

In [8]:
def cell_cell_cosinesim_velocitygraph_v2(adata1, adata2, key1, key2):
    
    A = adata1.uns[key1].copy()
    B = adata2.uns[key2].copy()
    mycells = adata1.obs.index.tolist().copy()
    
    A_normalized = scvelo.utils.get_transition_matrix(adata=adata1, vgraph=A)
    B_normalized = scvelo.utils.get_transition_matrix(adata=adata2, vgraph=B)
    
    if A.shape != B.shape:
        raise ValueError("Input matrices must have the same shape.")

    n_rows = A.shape[0]
    cosine_similarities = {}
    mysimilarities = []

    for i in range(n_rows):
        row_A = A_normalized[i, :].toarray()[0]
        row_B = B_normalized[i, :].toarray()[0]
        
        if np.isnan(row_A).any() or np.isnan(row_B).any():
            mysimilarities.append(np.nan)
        else:
            dot_product = np.inner(row_A, row_B)
            norm_A = np.linalg.norm(row_A)
            norm_B = np.linalg.norm(row_B)

            mysimilarities.append(dot_product / (norm_A * norm_B))
    
    cosine_similarities['cosine_similiarities'] = mysimilarities
    cosine_similarities['cell_ID'] = mycells

    df = pd.DataFrame.from_dict(cosine_similarities)
    
    return df

In [9]:
def cell_cell_sim_table(adatalist=[], namelist=[]):
    
    n = len(namelist)
    
    mydfs = []
    
    for i in range(n):
        for j in range(i+1, n):
            minidf = cell_cell_cosinesim_velocitygraph_v2(adatalist[i], adatalist[j], 'velocity_graph', 'velocity_graph')
            minidf = minidf.set_index('cell_ID')
            minidf = minidf.rename(columns={"cosine_similiarities": namelist[i]+'_'+namelist[j]})
            
            mydfs.append(minidf)
    bigdf = pd.concat(mydfs, axis=1)
    
    return bigdf
    
            

## Run the function here

In [10]:
ccDF = cell_cell_sim_table([adata_deepVel, adata_scvDeter, adata_scvDyn, adata_scvSto, adata_uniTvel], 
                    
                    ['deepVel', 'scvDet', 'scvDyn', 'scvSto', 'uniTV'])

ccDF

Unnamed: 0_level_0,deepVel_scvDet,deepVel_scvDyn,deepVel_scvSto,deepVel_uniTV,scvDet_scvDyn,scvDet_scvSto,scvDet_uniTV,scvDyn_scvSto,scvDyn_uniTV,scvSto_uniTV
cell_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
TDR18_AAACCCAAGGCGAAGG-1,0.721838,0.014837,0.026953,0.636901,0.000088,0.007920,0.318265,0.281565,0.007498,0.002107
TDR18_AAAGGGCAGATGACAT-1,0.789773,0.251344,0.787503,0.157887,0.549336,0.853496,0.132543,0.159120,0.202553,0.035488
TDR18_AAAGGGCAGGCCCAAA-1,0.763506,0.495711,0.640234,0.121795,0.566112,0.775445,0.067465,0.127607,0.064829,0.063840
TDR18_AAAGGTAAGATACGAT-1,0.962322,0.961540,0.584368,0.130873,0.981319,0.692900,0.064544,0.556497,0.065292,0.038766
TDR18_AAAGGTACAGTATTCG-1,0.552265,0.604369,0.495520,0.213461,0.963385,0.902791,0.007519,0.896643,0.007893,0.008271
...,...,...,...,...,...,...,...,...,...,...
TDR42_TTGGGATCATCCGCGA-1,0.700743,0.210684,0.532413,0.158445,0.341114,0.874839,0.111491,0.236607,0.233804,0.098626
TDR42_TTTCACAAGCATGCAG-1,0.989453,0.970859,0.974724,0.960028,0.982760,0.986757,0.968137,0.997438,0.983143,0.979785
TDR42_TTTCACAGTAGTGCGA-1,0.795227,0.111587,0.721613,0.198534,0.539019,0.859015,0.046575,0.205963,0.005343,0.022724
TDR42_TTTCAGTGTACATTGC-1,0.354114,0.125074,0.304697,0.524489,0.070691,0.812190,0.302812,0.273611,0.614235,0.394210


### Compare velocity prediction to itself, to check that everything is equal to 1 (perfect correlation).

In [13]:
mytest = cell_cell_cosinesim_velocitygraph_v2(adata_scvDeter, adata_scvDeter, 'velocity_graph', 'velocity_graph')

In [14]:
mytest.head()

Unnamed: 0,cosine_similiarities,cell_ID
0,1.0,TDR18_AAACCCAAGGCGAAGG-1
1,1.0,TDR18_AAAGGGCAGATGACAT-1
2,1.0,TDR18_AAAGGGCAGGCCCAAA-1
3,1.0,TDR18_AAAGGTAAGATACGAT-1
4,1.0,TDR18_AAAGGTACAGTATTCG-1


In [15]:
mytest['cosine_similiarities'].unique()

array([1.        , 0.99999994, 1.0000001 , 0.9999999 ], dtype=float32)

When comparing a matrix to itself, everything is equal to 1 (perfect correlation), so it is working how we think it should.