In [None]:
import os 
from pathlib import Path

from tqdm import tqdm
import numpy as np
import pandas as pd
import anndata as ad

import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['axes.facecolor'] = 'white'

In [None]:
annots_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/methylation/annot_with_scores.csv"
imputation_path = "/anvil/projects/x-mcb130189/qzeng/analysis/251105_merfish_methylation_2/Imputation.Subclass_Restricted.mC_MERFISH.meta.csv"
# imputation_path = "/anvil/projects/x-mcb130189/qzeng/analysis/251009_merfish_methylation/Imputation.Subclass_Restricted.mC_MERFISH.meta.csv"
# imputation_path = "/anvil/projects/x-mcb130189/qzeng/analysis/251009_merfish_methylation/Imputation.Group_Restricted.mC_MERFISH.meta.csv"
snm3c_annot_path = "/anvil/projects/x-mcb130189/Wubin/BG/clustering/100kb/annotations.tsv"
snm3c_path = "/anvil/projects/x-mcb130189/Wubin/BG/adata/BG.gene-CHN.h5ad"
spatial_annot_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.tsv"

In [None]:
meth_annot = pd.read_csv(annots_path, index_col=0)
display(meth_annot.head())
impute_df = pd.read_csv(imputation_path, index_col=0)
display(impute_df.head())
snm3c_annot = pd.read_csv(snm3c_annot_path, sep="\t", index_col=0)
display(snm3c_annot.head())
spatial_annot = pd.read_csv(spatial_annot_path, sep="\t", index_col=0)
display(spatial_annot.head())

In [None]:
mch_adata = ad.read_h5ad(snm3c_path)
mch_adata

In [None]:
common_cells = impute_df.index.intersection(mch_adata.obs_names)
len(common_cells)

In [None]:
# impute_df.shape[0]

In [None]:
mch_adata.obs.loc[common_cells]

In [None]:
X_list = []
for cell, _df in tqdm(impute_df.groupby('merfish_cell')): 
    use_cells = _df.index.intersection(mch_adata.obs_names)
    mini_mch = mch_adata[use_cells].copy()
    X = mini_mch.X.mean(axis=0)
    X_list.append(X)


In [None]:
cell_order = []
for cell, _df in tqdm(impute_df.groupby('merfish_cell')): 
    cell_order.append(cell)

In [None]:
display(_df)
display(mini_mch.obs)

In [None]:
spatial_mch = ad.AnnData(
    X=np.vstack(X_list),
    obs=pd.DataFrame(index=cell_order),
    var=mch_adata.var.copy(),
)

In [None]:
spatial_mch.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/methylation_2/BG_mCH_Imp_SubR.h5ad")

In [None]:
for col in spatial_annot.columns:
    spatial_mch.obs[col] = spatial_annot.loc[spatial_mch.obs_names, col].values

In [None]:
spatial_mch.obs

In [None]:
spatial_adata = ad.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.h5ad")

In [None]:
spatial_mch.obs['CENTER_X'] = spatial_adata.obs.loc[spatial_mch.obs_names, 'CENTER_X'].values
spatial_mch.obs['CENTER_Y'] = spatial_adata.obs.loc[spatial_mch.obs_names, 'CENTER_Y'].values

In [None]:
spatial_mch.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/methylation_2/BG_mCH_Imp_SubR.h5ad")

In [None]:
from spida.P.setup_adata import multi_round_clustering

In [None]:
multi_round_clustering(
    spatial_mch,
    layer=None,
    key_added="base_",
    num_rounds=1,
    leiden_res=1,
    min_dist=0.25,
    p_cutoff=0.05,
    knn=50,
    min_group_size=50,
    run_harmony=False, 
    batch_key=["replicate", "donor"],
    harmony_nclust=3,
    max_iter_harmony=10,
)