In [6]:
import networkx as nx
from scipy.sparse import csr_matrix
import scanpy as sc
import pandas as pd
import numpy as np
import pickle

In [7]:
# 初始化特征矩阵为expr_df.shape[0]行，1600列（根据需要调整）
feature_matrix = np.zeros((expr_df.shape[0], 1600))
current_feature_index = 0

coexp_networks = []
for i, file_path in enumerate(network_files):
    edges_df = pd.read_csv(file_path)
    
    # 确保边数据包含 'from', 'to', 和 'weight' 列
    if not all(col in edges_df.columns for col in ['from', 'to', 'weight']):
        raise ValueError(f"Edge file {file_path} does not contain required columns ('from', 'to', 'weight')")
    
    G = nx.Graph()
    for _, row in edges_df.iterrows():
        G.add_edge(row['from'], row['to'], weight=row['weight'] * 10)
    
    # 检查是否成功创建了Graph对象
    if not isinstance(G, nx.Graph):
        raise TypeError(f"Expected a NetworkX Graph object, but got {type(G)}")
    
    coexp_networks.append(G)

# 提取特征
for i, net in enumerate(coexp_networks):
    communities = nx.algorithms.community.louvain_communities(net)
    
    # 计算每个社区的加权平均表达值
    for c_idx, community in enumerate(communities):
        if current_feature_index >= 1600:
            break  # 如果已达到所需的特征数，则停止添加新特征
        
        community = [gene for gene in community if not pd.isnull(gene)]
        
        # 计算模块内基因的加权平均权重
        gene_weights = {gene: sum(net[gene][neighbor]['weight'] for neighbor in net.neighbors(gene)) for gene in community}
        
        # 加权平均表达值
        weighted_module_expr = np.zeros(expr_df.shape[0])
        for cell_id, expr_vec in expr_df.iterrows():
            weighted_avg_expr = sum(expr_vec.loc[gene] * gene_weights.get(gene, 0) for gene in community) / sum(
                gene_weights.values())
            weighted_module_expr[cell_id] = weighted_avg_expr
        
        # 将此模块的特征添加到特征矩阵
        feature_matrix[:, current_feature_index] = weighted_module_expr
        current_feature_index += 1
    
    if current_feature_index >= 1600:
        break  # 如果已达到所需的特征数，则停止处理后续网络

In [12]:

# 确保特征矩阵大小正确
feature_matrix = feature_matrix[:, :1600]
print(feature_matrix.shape)

# 将特征矩阵转换为DataFrame
feature_df = pd.DataFrame(feature_matrix, index=expr_df.index)

# 保存为CSV文件
output_file_path = "./data/HC-brain/NoFusion_features.csv"
feature_df.to_csv(output_file_path, index=True)


(6050, 1600)
