In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd

import scanpy as sc
import squidpy as sq
import voyagerpy as vp
import seaborn as sns
import os
import pickle
from matplotlib.pyplot import imread
from collections import OrderedDict
import json

from matplotlib import pyplot as plt

In [2]:
from cellphonedb.src.core.methods import cpdb_analysis_method

In [3]:
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.family'] = ['serif']
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [5]:
path_016 = "/data/kanferg/Sptial_Omics/playGround/Data/Visium_HD_Mouse_Brain_square_example/square_016um"
andata016_ = sc.read_visium(path=path_016)
andata016_
andata016_.var_names_make_unique()
andata016_.obsm['spatial'] = np.array(andata016_.obsm['spatial'], dtype=np.float64)


sc.pp.filter_cells(andata016_, min_counts=1000)

sc.pp.filter_cells(andata016_, min_genes=1000)

sc.pp.filter_genes(andata016_, min_counts=1000)

sc.pp.filter_genes(andata016_, max_counts=6281)

andata016_.obsm['spatial'] = np.array(andata016_.obsm['spatial'], dtype=np.float64)
andata016_.uns['spatial']['img'] = andata016_.uns['spatial']['Visium_HD_Mouse_Brain'].pop("images")
andata016_.uns['spatial']['scale'] = andata016_.uns['spatial']['Visium_HD_Mouse_Brain'].pop("scalefactors")
andata016_.uns['spatial']['metadata'] = andata016_.uns['spatial']['Visium_HD_Mouse_Brain'].pop("metadata")
andata016_.uns['spatial'].pop("Visium_HD_Mouse_Brain")

# change order of images
images = andata016_.uns['spatial'].pop('img')
images_hires = {'lowres':images['lowres'],'hires':images['hires']}
andata016_.uns['spatial']['img'] = images_hires


is_mt = andata016_.var_names.str.startswith('mt')
vp.utils.add_per_cell_qcmetrics(andata016_, subsets={'mito': is_mt})


spot_diameter_fullres = andata016_.uns['spatial']['scale'].pop('spot_diameter_fullres')
andata016_.uns['spatial']['scale']['spot_diameter_fullres'] = {'pxl_col_in_fullres':spot_diameter_fullres,'pxl_row_in_fullres':spot_diameter_fullres}
# insted of vp.spatial.get_visium_spots(andata016_, with_radius=False) I have done:
#scale = andata016_.uns['spatial']['scale']['tissue_lowres_scalef']
scale = 1
scale_dict = andata016_.uns["spatial"].get("scale", {})
spot_diam = scale_dict.get("spot_diameter_fullres")
visium_spots = gpd.GeoSeries.from_xy(andata016_.obsm['spatial'][:,0], andata016_.obsm['spatial'][:,1]).scale(scale, scale, origin=(0, 0))
_ = vp.spatial.set_geometry(andata016_, geom="spot_poly", values=visium_spots)
andata016_.uns['config'] = OrderedDict()
andata016_.uns["config"]["secondary_var_names"] = andata016_.var_names
pathout = "/data/kanferg/Sptial_Omics/VoyagerPy_fork/voyagerpy/out"

qc_features = ["sum", "detected", "subsets_mito_percent"]
andata016_.uns['config'] = OrderedDict()
andata016_.uns["config"]["secondary_var_names"] = andata016_.var_names

  positions = pd.read_csv(files["tissue_positions_file"], header=None)


In [6]:
# The original count data
andata016_.layers['counts'] = andata016_.X.copy()
# Log-normalize the adata.X matrix
vp.utils.log_norm_counts(andata016_, inplace=True)
andata016_.layers['logcounts'] = andata016_.X.copy()


gene_var = vp.utils.model_gene_var(andata016_.layers['logcounts'], gene_names=andata016_.var_names)
hvgs = vp.utils.get_top_hvgs(gene_var)

andata016_.var['highly_variable'] = False
andata016_.var.loc[hvgs, 'highly_variable'] = True

andata016_.X = vp.utils.scale(andata016_.X, center=True)
sc.tl.pca(andata016_, use_highly_variable=True, n_comps=30, random_state=1337)
andata016_.X = andata016_.layers['logcounts'].copy()

from leidenalg import ModularityVertexPartition
sc.pp.neighbors(
    andata016_,
    n_pcs=9,
    use_rep='X_pca',
    method='gauss',
    n_neighbors=80
)
sc.tl.leiden(
    andata016_,
    random_state=29,
    resolution=None,
    key_added='cluster',
    partition_type=ModularityVertexPartition
)

In [7]:
andata016_

AnnData object with n_obs × n_vars = 21445 × 6350
    obs: 'in_tissue', 'array_row', 'array_col', 'n_counts', 'n_genes', 'sum', 'detected', 'subsets_mito_sum', 'subsets_mito_detected', 'subsets_mito_percent', 'cluster'
    var: 'gene_ids', 'feature_types', 'genome', 'n_counts', 'highly_variable'
    uns: 'spatial', 'config', 'pca', 'neighbors', 'leiden'
    obsm: 'spatial', 'geometry', 'X_pca'
    varm: 'PCs'
    layers: 'counts', 'logcounts'
    obsp: 'distances', 'connectivities'

In [8]:
metadata = andata016_.obs[['cluster']].copy()
metadata['Cell'] = metadata.index
metadata.rename(columns={'cluster': 'cell_type'}, inplace=True)
metadata = metadata[['Cell', 'cell_type']]
counts = pd.DataFrame(andata016_.X.T.todense(), index=andata016_.var_names, columns=andata016_.obs_names)

In [9]:
counts

Unnamed: 0,s_016um_00342_00082-1,s_016um_00156_00322-1,s_016um_00342_00054-1,s_016um_00036_00081-1,s_016um_00225_00065-1,s_016um_00052_00149-1,s_016um_00156_00102-1,s_016um_00109_00312-1,s_016um_00284_00131-1,s_016um_00204_00145-1,...,s_016um_00124_00306-1,s_016um_00308_00250-1,s_016um_00335_00304-1,s_016um_00046_00224-1,s_016um_00302_00195-1,s_016um_00128_00159-1,s_016um_00288_00288-1,s_016um_00039_00175-1,s_016um_00037_00193-1,s_016um_00144_00329-1
Xkr4,0.000000,0.000000,0.000000,1.257069,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.994712,0.0,0.000000,0.000000,0.000000
Lypla1,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,1.044829,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
Rgs20,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,1.044829,0.000000,0.000000,...,0.000000,0.000000,0.699779,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
Rb1cc1,0.000000,1.853601,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,1.120497,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
Pcmtd1,0.000000,0.000000,0.541553,0.000000,0.0,0.0,1.258512,0.000000,0.812963,1.127325,...,0.000000,0.000000,0.000000,1.309783,0.0,0.000000,0.0,0.000000,1.806128,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kdm5d,0.000000,0.000000,0.541553,0.000000,0.0,0.0,1.258512,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.994712,0.0,0.000000,0.000000,1.374395
Eif2s3y,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
Uty,0.000000,0.000000,0.541553,0.000000,0.0,0.0,0.000000,1.044829,0.000000,0.000000,...,0.000000,1.288141,0.000000,1.309783,0.0,0.000000,0.0,1.423303,0.000000,0.000000
Ddx3y,1.297322,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.699779,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000


In [10]:
metadata

Unnamed: 0,Cell,cell_type
s_016um_00342_00082-1,s_016um_00342_00082-1,11
s_016um_00156_00322-1,s_016um_00156_00322-1,3
s_016um_00342_00054-1,s_016um_00342_00054-1,11
s_016um_00036_00081-1,s_016um_00036_00081-1,1
s_016um_00225_00065-1,s_016um_00225_00065-1,10
...,...,...
s_016um_00128_00159-1,s_016um_00128_00159-1,7
s_016um_00288_00288-1,s_016um_00288_00288-1,4
s_016um_00039_00175-1,s_016um_00039_00175-1,3
s_016um_00037_00193-1,s_016um_00037_00193-1,1


In [11]:
import pickle
import os

In [9]:
pathout = '/data/kanferg/Sptial_Omics/SpatialOmicsToolkit/out_2'

with open(os.path.join(pathout,'test_meta.pkl'), 'wb') as f:
    pickle.dump(metadata, f)
with open(os.path.join(pathout,'test_counts.pkl'), 'wb') as f:
    pickle.dump(counts, f)

In [16]:
cpdb_file_path = "/data/kanferg/cellphonedb/NatureProtocols2024_case_studies/v5.0.0/cellphonedb.zip"
meta_file_path = "/data/kanferg/Sptial_Omics/SpatialOmicsToolkit/out_2/test_meta.pkl"
counts_file_path = "/data/kanferg/Sptial_Omics/SpatialOmicsToolkit/out_2/test_counts.pkl"
out_path = "/data/kanferg/Sptial_Omics/SpatialOmicsToolkit/cpdb_out"

In [17]:
from cellphonedb.src.core.methods import cpdb_statistical_analysis_method

In [18]:
cpdb_results = cpdb_statistical_analysis_method.call(
         cpdb_file_path = cpdb_file_path,
         meta_file_path = meta_file_path,
         counts_file_path = counts_file_path,
         counts_data = 'gene_name',
         output_path = out_path)

Reading user files...


UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte