In [6]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" # Change to -1 if you want to use CPU!

import warnings
warnings.filterwarnings('ignore')

In [7]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd
import scanpy as sc
import colorcet
import sklearn.neighbors
import scipy.sparse
import umap.umap_ as umap
from fa2 import ForceAtlas2

In [None]:
sc_data = sc.read_h5ad('/Users/anushka/Desktop/MERFISH_Data/sc_data.h5ad')
st_data= sc.read_h5ad('/Users/anushka/Desktop/MERFISH_Data/st_data.h5ad')

In [9]:
sc_data

AnnData object with n_obs × n_vars = 71183 × 30618
    obs: 'aggr_num', 'umi.counts', 'gene.counts', 'library_id', 'tube_barcode', 'Seq_batch', 'Region', 'Lib_type', 'Gender', 'Donor', 'Amp_Name', 'Amp_Date', 'Amp_PCR_cyles', 'Lib_Name', 'Lib_Date', 'Replicate_Lib', 'Lib_PCR_cycles', 'Lib_PassFail', 'Cell_Capture', 'Lib_Cells', 'Mean_Reads_perCell', 'Median_Genes_perCell', 'Median_UMI_perCell', 'Saturation', 'Live_percent', 'Total_Cells', 'Live_Cells', 'method', 'exp_component_name', 'mapped_reads', 'unmapped_reads', 'nonconf_mapped_reads', 'total.reads', 'doublet.score', 'cluster_label', 'subclass_label', 'class_label', 'cell_type', 'cell_label'

In [None]:
st_data

AnnData object with n_obs × n_vars = 276556 × 254
    obs: 'fovID', 'fov_x', 'fov_y', 'volume', 'center_x', 'center_y', 'slice_id', 'sample_id', 'label', 'subclass', 'class_label', 'cell_id', 'cell_type', 'batch', 'cell_label', 'Layer_Depth', 'Depth'
    var: 'n_iso', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    obsm: 'spatial'

In [11]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
import numpy as np
from scipy.linalg import sqrtm

def calculate_covet(expression_matrix, spatial_coordinates, k=8):
    # Find spatial nearest neighbors
    nn = NearestNeighbors(n_neighbors=k, metric='euclidean')
    nn.fit(spatial_coordinates)
    _, indices = nn.kneighbors(spatial_coordinates)
    
    # Calculate global mean
    global_mean = np.mean(expression_matrix, axis=0)
    
    # Calculate COVET matrices
    covet_matrices = []
    for idx in indices:
        niche_matrix = expression_matrix[idx]
        shifted_matrix = niche_matrix - global_mean
        covet = np.dot(shifted_matrix.T, shifted_matrix) / k
        covet_matrices.append(covet)
    
    return np.array(covet_matrices)


def aot_distance(covet1, covet2, epsilon=0.1, max_iter=100):
    """
    Calculate the Approximate Optimal Transport distance between two COVET matrices.
    
    Args:
    covet1, covet2: Input COVET matrices
    epsilon: Regularization parameter
    max_iter: Maximum number of iterations for Sinkhorn algorithm
    
    Returns:
    float: AOT distance between covet1 and covet2
    """
    # Ensure matrices are positive semi-definite
    covet1 = np.maximum(covet1, 0)
    covet2 = np.maximum(covet2, 0)
    
    # Calculate matrix square roots
    sqrt_covet1 = sqrtm(covet1)
    sqrt_covet2 = sqrtm(covet2)
    
    # Calculate the product of square roots
    product = np.dot(sqrt_covet1, sqrt_covet2)
    
    # Compute the trace
    trace_term = np.trace(covet1 + covet2 - 2 * sqrtm(product))
    
    # Sinkhorn iteration for entropic regularization
    n = covet1.shape[0]
    K = np.exp(-trace_term / epsilon)
    u = np.ones(n) / n
    v = np.ones(n) / n
    
    for _ in range(max_iter):
        u = 1 / np.dot(K, v)
        v = 1 / np.dot(K.T, u)
    
    # Compute final distance
    pi = np.diag(u) @ K @ np.diag(v)
    distance = np.sum(pi * trace_term)
    
    return np.sqrt(distance)



def get_covet_knn_matrix(covet_matrices, k=8):
    n_cells = len(covet_matrices)
    distance_matrix = np.zeros((n_cells, n_cells))
    
    # Calculate pairwise distances
    for i in range(n_cells):
        for j in range(i+1, n_cells):
            dist = aot_distance(covet_matrices[i], covet_matrices[j])
            distance_matrix[i, j] = distance_matrix[j, i] = dist
    
    # Find k nearest neighbors
    nn = NearestNeighbors(n_neighbors=k, metric='precomputed')
    nn.fit(distance_matrix)
    
    return nn.kneighbors(return_distance=False)





In [12]:
import tensorflow as tf
def gpu_calculate_covet(expression_matrix, spatial_coordinates, k=8):
    device = tf.device("/MPS:0" if tf.config.list_physical_devices('MPS') else "/CPU:0")

    with device:
        expression_matrix = tf.convert_to_tensor(expression_matrix, dtype=tf.float32)
        spatial_coordinates = tf.convert_to_tensor(spatial_coordinates, dtype=tf.float32)

        # Find spatial nearest neighbors
        nn = NearestNeighbors(n_neighbors=k, metric='euclidean')
        nn.fit(spatial_coordinates.numpy())
        _, indices = nn.kneighbors(spatial_coordinates.numpy())

        # Calculate global mean
        global_mean = tf.reduce_mean(expression_matrix, axis=0)

        # Calculate COVET matrices
        covet_matrices = []
        for idx in indices:
            niche_matrix = tf.gather(expression_matrix, idx)
            shifted_matrix = niche_matrix - global_mean
            covet = tf.matmul(tf.transpose(shifted_matrix), shifted_matrix) / k
            covet_matrices.append(covet)

        return tf.stack(covet_matrices)

In [None]:
expression_matrix = sc_data
spatial_coordinates = st_data

# Example usage
covet_matrices = gpu_calculate_covet(expression_matrix, spatial_coordinates)

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x1189c9730>>
Traceback (most recent call last):
  File "/opt/anaconda3/envs/octopus/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


In [None]:
covet_matrices = calculate_covet(expression_matrix, spatial_coordinates)

In [None]:
knn_matrix = get_covet_knn_matrix(covet_matrices)