In [None]:
import cell2cell as c2c
import scanpy as sc
import pandas as pd
import anndata as ad
import h5py
import itertools

In [None]:
data_name = 'mouse_old_sub'
rnaseq = sc.read('../scripts/data/public_mouse_aging/scRNA/mouse_old_sub.h5ad')
rnaseq.var_names_make_unique()
save_path = f'./results/{data_name}_MajorType/'
celltype = 'MajorType'

In [None]:
value_counts = rnaseq.obs[celltype].value_counts()
valid_subclasses = value_counts[value_counts > 20].index.tolist()
rnaseq = rnaseq[rnaseq.obs[celltype].isin(valid_subclasses)]
sc.pp.filter_cells(rnaseq, min_genes=200)
sc.pp.filter_genes(rnaseq, min_cells=3)

In [None]:
lr_pairs = pd.read_csv('./Mouse-2020-Jin-LR-pairs.csv')
lr_pairs = lr_pairs.astype(str)
meta = rnaseq.obs.copy()

In [None]:
meta

In [None]:
interactions = c2c.analysis.SingleCellInteractions(rnaseq_data=rnaseq.to_df().T,
                                                   ppi_data=lr_pairs,
                                                   metadata=meta,
                                                   interaction_columns=('ligand_symbol', 'receptor_symbol'),
                                                   communication_score='expression_thresholding',
                                                   expression_threshold=0.1, # values after aggregation
                                                   cci_score='bray_curtis',
                                                   cci_type='directed',
                                                   aggregation_method='nn_cell_fraction',
                                                   barcode_col='NAME', #根据meta列名更改
                                                   # celltype_col='celltype',
                                                   celltype_col=celltype,
                                                   complex_sep='&',
                                                   verbose=False)

In [None]:
interactions.compute_pairwise_communication_scores()

In [None]:
interactions.compute_pairwise_cci_scores()

In [None]:
df = interactions.interaction_elements['cci_matrix']
min_val = df.min().min()  # 全局最小值
max_val = df.max().max()  # 全局最大值

# 归一化公式
df = (df - min_val) / (max_val - min_val)

In [None]:
final_df = df
file_name = 'CCI.csv'
final_df.to_csv(save_path + file_name)

In [None]:
ccc_pvals = interactions.permute_cell_labels(evaluation='communication',
                                             permutations=100, 
                                             fdr_correction=True,
                                             verbose=True)

In [None]:
# 存放结果
rows = []

for (ligand, receptor), row in ccc_pvals.iterrows():
    # 遍历该 LR pair 对应的所有细胞对
    for cell_pair, pval in row.items():
        rows.append([
            cell_pair.replace(";", "|"),
            ligand,
            receptor,
            f"{ligand} - {receptor}",
            pval
        ])

# 转换为 DataFrame
df_long = pd.DataFrame(rows, columns=['cell_pair', 'ligand', 'receptor', 'LR_pair', 'pval'])

df = df_long[df_long['pval'] < 0.99]

# 保存
df.to_csv(f"./results/{data_name}_MajorType/lr_value_cell2cell.csv", index=False)

In [None]:
def matrix_mult(df, cell_type_pairs, pval_threshold=0.9):
    results = []

    for ctp in cell_type_pairs:
        if ctp not in df.columns:
            print()
            continue

        # 提取显著的 LR 行
        lr_mask = df[ctp] <= pval_threshold
        lr_index = df.index[lr_mask]   # 直接用 index，不再做字符串解析

        significant_lrs = set()
        for ligand, receptor in lr_index:  # index 是 MultiIndex: (ligand, receptor)
            significant_lrs.add(f"{ligand} - {receptor}")

        results.append({
            'cell_pair': ctp.replace(";", "|"),  # 统一成 | 分隔
            'lr_pairs': list(significant_lrs),
            'lr_count': len(significant_lrs)
        })

    return pd.DataFrame(results).sort_values('lr_count', ascending=False)

In [None]:
cell_type_pairs = ccc_pvals.columns.tolist()
result_df = matrix_mult(ccc_pvals, cell_type_pairs, pval_threshold=0.99)

# 按lr_count降序排列
result_df = result_df.sort_values('lr_count', ascending=False)

In [None]:
total_lr_count = result_df['lr_count'].sum()
print("所有细胞对的 lr_count 总和:", total_lr_count)
result_df.to_csv(f'./results/{data_name}_MajorType/lr_cell2cell.csv')