## Applies COMMOT to HVGs, SVGs, and unfiltered expression data set for mouse and human brain ST 

In [3]:
import pandas as pd
import numpy as np
import scanpy as sc
import commot as ct
import os

In [4]:
# load in mouse data
hvg_gene_cell = os.path.join("/Users/emilyekstrum/repos/zhangLab_Rotation/data/processed/LR_filtering", "mouse_hvg_lr_gene_cell_matrix.csv")
svg_gene_cell = os.path.join("/Users/emilyekstrum/repos/zhangLab_Rotation/data/processed/LR_filtering", "mouse_nnsvg_lr_gene_cell_matrix.csv")

In [5]:
# transpose matrices to get cells x genes
hvg_data = pd.read_csv(hvg_gene_cell, index_col=0)
svg_data = pd.read_csv(svg_gene_cell, index_col=0)

# Ensure the data is in the correct format before transposing
hvg_data = pd.DataFrame(hvg_data).astype(float).T
svg_data = pd.DataFrame(svg_data).astype(float).T

# check row and column names
print("HVG data shape:", hvg_data.shape)
print("SVG data shape:", svg_data.shape)
print("HVG data columns (genes):", hvg_data.columns)
print("SVG data columns (genes):", svg_data.columns)
print("HVG data index (cells):", hvg_data.index)
print("SVG data index (cells):", svg_data.index)

HVG data shape: (1640, 267)
SVG data shape: (1640, 244)
HVG data columns (genes): Index(['Wnt5a', 'Tgfa', 'Nrg1', 'Nrg2', 'Fgf1', 'Fgf7', 'Fgf9', 'Pdgfa',
       'Pdgfb', 'Pdgfc',
       ...
       'Cadm4', 'Epb41l1', 'P4hb', 'Dscam', 'Cd38', 'Lrp8', 'Vldlr', 'Stra6',
       'Robo1', 'Robo2'],
      dtype='object', length=267)
SVG data columns (genes): Index(['Wnt5a', 'Tgfa', 'Hbegf', 'Nrg3', 'Fgf1', 'Fgf9', 'Pdgfa', 'Pdgfb',
       'Vegfa', 'Vegfb',
       ...
       'Cadm4', 'Epb41l1', 'Tmem219', 'P4hb', 'Dscam', 'Cd38', 'Lrp8', 'Vldlr',
       'Robo1', 'Robo2'],
      dtype='object', length=244)
HVG data index (cells): Index(['AAACAAGTATCTCCCA.1', 'AAACAGAGCGACTCCT.1', 'AAACAGGGTCTATATT.1',
       'AAACATTTCCCGGATT.1', 'AAACCCGAACGAAATC.1', 'AAACCGGAAATGTTAA.1',
       'AAACCGGGTAGGTACC.1', 'AAACGAGACGGTTGAT.1', 'AAACTGCTGGCTCCAA.1',
       'AAACTTGCAAACGTAT.1',
       ...
       'TTGTGAGGCATGACGC.1', 'TTGTGCAGCCACGTCA.1', 'TTGTGGCCCTGACAGT.1',
       'TTGTGTATGCCACCAA.1', 'TTGTGTTT