In [18]:
import os
import scanpy as sc
import numpy  as np
import util
import pandas as pd
from tqdm import tqdm

# Load cell dataframe

In [2]:
in_dir = r'Y:\coskun-lab\Zhou\4_HCR\20220211_MSC_hydrogel\00_analysis\spagnn\cell_df'
fn_l = os.listdir(in_dir)
fn_l.sort()

In [19]:
cell_df_l = []
for fn in tqdm(fn_l):
    if fn.endswith('.csv'):
        cell = pd.read_csv(os.path.join(in_dir,fn),index_col=0)
        cell_id = fn.split('.')[0]
        cell['patch'] = cell_id + '_' + cell['patch'].astype('str')
        cell_df_l.append(cell)

100%|████████████████████████████████████████████████████████████████████████████████| 554/554 [00:25<00:00, 21.85it/s]


In [20]:
cell_dfs = pd.concat(cell_df_l)

In [26]:
patch_count = util.counting(cell_dfs, 'patch', 'gene')

In [27]:
patch_count

Unnamed: 0,Count_id,ACAN,CD105,CD19,CD34,CD90,COL1A1,COL2A1,COMP,EEF2,...,COL5A2,CXCR4,IL6,PTH1R,SOX9,SPP1,CD79B,CCL11,CD45,ACTb
0,bm_001_01_0,3.0,8.0,9.0,7.0,1.0,3.0,4.0,17.0,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,bm_001_01_1,0.0,2.0,0.0,1.0,0.0,9.0,6.0,5.0,4.0,...,23.0,1.0,1.0,22.0,3.0,1.0,0.0,0.0,0.0,0.0
2,bm_001_01_10,1.0,8.0,4.0,2.0,1.0,0.0,2.0,15.0,10.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,bm_001_01_11,0.0,1.0,0.0,0.0,1.0,0.0,0.0,7.0,5.0,...,16.0,0.0,1.0,7.0,0.0,0.0,1.0,0.0,0.0,0.0
4,bm_001_01_12,0.0,0.0,6.0,1.0,3.0,3.0,0.0,11.0,5.0,...,16.0,2.0,8.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25436,uc_047_05_50,1.0,2.0,9.0,1.0,2.0,34.0,3.0,6.0,11.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,1.0,34.0
25437,uc_047_05_6,22.0,12.0,14.0,18.0,5.0,191.0,8.0,4.0,55.0,...,42.0,5.0,18.0,2.0,5.0,9.0,5.0,6.0,17.0,81.0
25438,uc_047_05_7,3.0,7.0,4.0,4.0,9.0,158.0,16.0,5.0,29.0,...,32.0,1.0,12.0,3.0,6.0,17.0,35.0,7.0,35.0,67.0
25439,uc_047_05_8,9.0,18.0,10.0,11.0,10.0,140.0,12.0,6.0,62.0,...,22.0,2.0,14.0,18.0,4.0,11.0,6.0,8.0,9.0,74.0


In [28]:
sc_adata = sc.AnnData(patch_count.values[:,1:].astype('float'))

In [30]:
sc_adata.var_name = patch_count.columns.tolist()[1:]

In [31]:
sc_adata.write_h5ad(r'Y:\coskun-lab\Zhou\4_HCR\20220211_MSC_hydrogel\00_analysis\envi\single-patch_data.h5ad')

# Load and prepare spatial_adata

In [33]:
# spatial_adata.X, var_names
sp_adata = sc.AnnData(patch_count.values[:,1:].astype('float'))
sp_adata.var_names = patch_count.columns.tolist()[1:]

In [36]:
# sp_adata.obs
obs = {'row':[],'col':[],'z':[],'cell_type':[],'fov':[],'cell_id':[]}
patches = patch_count['Count_id']
df_by_patch = cell_dfs.groupby('patch')

for item in tqdm(patches):
    sub_group = df_by_patch.get_group(item)
    obs['row'].append(sub_group['row'].mean())
    obs['col'].append(sub_group['col'].mean())
    obs['z'].append(sub_group['z'].mean())
    temp = item.split('_')
    obs['cell_type'].append(temp[0])
    obs['fov'].append(temp[1])
    obs['cell_id'].append(temp[0]+'_'+temp[1]+'_'+temp[2])

100%|██████████████████████████████████████████████████████████████████████████| 25441/25441 [00:10<00:00, 2313.04it/s]


In [37]:
sp_adata.obs = pd.DataFrame(obs)

In [47]:
# sp_adata.obsm
sp_adata.obsm['spatial'] = sp_adata.obs.values[:,:3]

In [52]:
# sp_adata.obsm['COVET']
proxi_df = pd.read_csv(r'Y:\coskun-lab\Zhou\4_HCR\3d_spagnn\spagnn\results\proxi_df.csv',index_col=0)

In [61]:
genes = patch_count.columns.tolist()[1:]

In [67]:
COVET = np.zeros((len(patches),31,31))
for i,item in tqdm(enumerate(proxi_df.index.tolist())):
    for pair in proxi_df.columns.tolist():
        if not '_' in pair:
            continue
        gene1,gene2 = pair.split('_')
        idx1 = genes.index(gene1)
        idx2 = genes.index(gene2)
        COVET[i,idx1,idx2] = proxi_df.loc[item,pair]

25441it [01:35, 266.05it/s]


In [68]:
sp_adata.obsm['COVET'] = COVET

In [72]:
sp_adata.obsm['spatial'] = sp_adata.obsm['spatial'].astype('float')

In [73]:
sp_adata.write_h5ad(r'Y:\coskun-lab\Zhou\4_HCR\20220211_MSC_hydrogel\00_analysis\envi\spatial_data.h5ad')