# Distance Covariance Analysis (DCA) for mouse COMMOT results 

- X = SVG or HVG expression matrix
- Y = spatial coords

author: @emilyekstrum
1/26/26

In [5]:
import dca
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
import anndata

In [None]:
# load in mouse seurat object
ro.r('library(Seurat)')
ro.r('obj <- readRDS("/Users/emilyekstrum/repos/zhangLab_Rotation/data/processed/seurat_objs/mousebrain_seurat.rds")')

R callback write-console: Loading required package: SeuratObject
  
R callback write-console: Loading required package: sp
  
R callback write-console: 
Attaching package: ‘SeuratObject’

  
R callback write-console: The following objects are masked from ‘package:base’:

    intersect, t

  



    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    

In [10]:
# get mouse seurat obj coordinates
coords = ro.r('''
md <- obj@meta.data
md[, c("imagecol","imagerow")]
''')
coords = pandas2ri.rpy2py(coords)
coords.index = coords.index.astype(str)
coords.columns = ["x", "y"]  


print(coords.head()) # cell ID by location

                         x       y
AAACAAGTATCTCCCA-1  7410.0  8455.0
AAACAGAGCGACTCCT-1  3097.0  7905.0
AAACAGGGTCTATATT-1  7050.0  2327.0
AAACATTTCCCGGATT-1  8728.0  8111.0
AAACCCGAACGAAATC-1  6811.0  9351.0


In [6]:
# load in mouse svgs
mouse_svgs = anndata.read_h5ad("/Users/emilyekstrum/repos/zhangLab_Rotation/data/processed/COMMOT/mouse_svg_lr_sets/mouse_commot_LR_svg.h5ad")

In [11]:
# get counts matrix for svgs
svgs_counts = mouse_svgs.X.copy()
if isinstance(svgs_counts, np.ndarray):
    svgs_counts = svgs_counts
else:
    svgs_counts = svgs_counts.toarray()
svgs_counts = pd.DataFrame(svgs_counts, index=mouse_svgs.obs_names, columns=mouse_svgs.var_names)
svgs_counts.index = svgs_counts.index.astype(str)

In [12]:
# check matrix
print(svgs_counts.head())  # cell ID by gene expression

                    Xkr4  Sox17    Mrpl15  Lypla1     Tcea1     Rgs20  \
AAACAAGTATCTCCCA-1   0.0    0.0  0.000000     0.0  0.000000  0.000000   
AAACAGAGCGACTCCT-1   0.0    0.0  0.693147     0.0  0.693147  0.000000   
AAACAGGGTCTATATT-1   0.0    0.0  0.693147     0.0  0.000000  0.693147   
AAACATTTCCCGGATT-1   0.0    0.0  0.000000     0.0  1.098612  0.000000   
AAACCCGAACGAAATC-1   0.0    0.0  0.000000     0.0  0.000000  0.000000   

                     Atp6v1h  Oprk1  Npbwr1    Rb1cc1  ...    mt-Nd4  \
AAACAAGTATCTCCCA-1  0.000000    0.0     0.0  0.000000  ...  4.553877   
AAACAGAGCGACTCCT-1  0.693147    0.0     0.0  0.693147  ...  5.056246   
AAACAGGGTCTATATT-1  0.693147    0.0     0.0  0.693147  ...  4.787492   
AAACATTTCCCGGATT-1  0.000000    0.0     0.0  0.000000  ...  4.499810   
AAACCCGAACGAAATC-1  0.000000    0.0     0.0  0.693147  ...  4.317488   

                      mt-Nd5    mt-Nd6   mt-Cytb     Vamp7     Spry3  Tmlhe  \
AAACAAGTATCTCCCA-1  2.302585  0.000000  4.852030 

In [19]:
# align coords and counts matrix
common_cells = svgs_counts.index.intersection(coords.index)
svgs_counts = svgs_counts.loc[common_cells]
svg_coords = coords.loc[common_cells]

In [20]:
# check that indices match
assert all(svgs_counts.index == svg_coords.index), "Indices do not match!"

In [7]:
# load im mouse hvgs
mouse_hvgs = anndata.read_h5ad("/Users/emilyekstrum/repos/zhangLab_Rotation/data/processed/COMMOT/mouse_hvg_lr_sets/mouse_commot_LR_hvg.h5ad")

In [13]:
# get counts matrix for hvgs
hvgs_counts = mouse_hvgs.X.copy()
if isinstance(hvgs_counts, np.ndarray):
    hvgs_counts = hvgs_counts
else:
    hvgs_counts = hvgs_counts.toarray()
hvgs_counts = pd.DataFrame(hvgs_counts, index=mouse_hvgs.obs_names, columns=mouse_hvgs.var_names)
hvgs_counts.index = hvgs_counts.index.astype(str)

In [17]:
# check matrix
print(hvgs_counts.head())  # cell ID by gene expression

                    Xkr4  Sox17    Mrpl15  Lypla1     Tcea1     Rgs20  \
AAACAAGTATCTCCCA-1   0.0    0.0  0.000000     0.0  0.000000  0.000000   
AAACAGAGCGACTCCT-1   0.0    0.0  0.693147     0.0  0.693147  0.000000   
AAACAGGGTCTATATT-1   0.0    0.0  0.693147     0.0  0.000000  0.693147   
AAACATTTCCCGGATT-1   0.0    0.0  0.000000     0.0  1.098612  0.000000   
AAACCCGAACGAAATC-1   0.0    0.0  0.000000     0.0  0.000000  0.000000   

                     Atp6v1h  Oprk1  Npbwr1    Rb1cc1  ...    mt-Nd4  \
AAACAAGTATCTCCCA-1  0.000000    0.0     0.0  0.000000  ...  4.553877   
AAACAGAGCGACTCCT-1  0.693147    0.0     0.0  0.693147  ...  5.056246   
AAACAGGGTCTATATT-1  0.693147    0.0     0.0  0.693147  ...  4.787492   
AAACATTTCCCGGATT-1  0.000000    0.0     0.0  0.000000  ...  4.499810   
AAACCCGAACGAAATC-1  0.000000    0.0     0.0  0.693147  ...  4.317488   

                      mt-Nd5    mt-Nd6   mt-Cytb     Vamp7     Spry3  Tmlhe  \
AAACAAGTATCTCCCA-1  2.302585  0.000000  4.852030 

In [21]:
# align coords and counts matrix
hvg_common_cells = hvgs_counts.index.intersection(coords.index)
hvgs_counts = hvgs_counts.loc[hvg_common_cells]
hvg_coords = coords.loc[hvg_common_cells]

In [22]:
# check that indices match
assert all(hvgs_counts.index == hvg_coords.index), "Indices do not match!"

In [None]:
# inputs for DCA 
# need X -> standardized gene x cell matrix