In [None]:
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
import numpy as np

# 加载基因相互作用网络数据
network_data = pd.read_csv('../.././dataSource/39947.protein.links.v12.0.output.txt',sep='\t')

# 创建节点映射
nodes = set(network_data['protein1']).union(set(network_data['protein2']))
node_mapping = {node: idx for idx, node in enumerate(nodes)}

# 构建边索引
edge_index = torch.tensor([
    [node_mapping[protein] for protein in network_data['protein1']],
    [node_mapping[protein] for protein in network_data['protein2']]
], dtype=torch.long)

# 构建边权重
edge_weight = torch.tensor(network_data['combined_score'].values, dtype=torch.float)

# 初始化节点特征
num_nodes = len(nodes)
x = torch.ones((num_nodes, 1), dtype=torch.float)

# 创建图数据对象
data = Data(x=x, edge_index=edge_index, edge_attr=edge_weight)

# 定义图卷积网络
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(1, 16)
        self.conv2 = GCNConv(16, 8)

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr
        x = self.conv1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        return x

# 初始化模型
model = GCN()

# 前向传播提取特征
node_features = model(data).detach().numpy()

# 计算节点之间的余弦相似度
cosine_similarity_matrix = np.dot(node_features, node_features.T) / (
    np.linalg.norm(node_features, axis=1)[:, np.newaxis] * np.linalg.norm(node_features, axis=1)
)

# 计算每个节点与其他节点的平均余弦相似度
average_similarity = cosine_similarity_matrix.mean(axis=1)

# 按照平均余弦相似度排序
sorted_indices = np.argsort(average_similarity)[::-1]
sorted_nodes = [list(nodes)[idx] for idx in sorted_indices]
sorted_scores = average_similarity[sorted_indices]

# 线性放缩得分
# 假设要将得分放缩到 [0, 100] 区间
a = sorted_scores.min()
b = sorted_scores.max()
c = 0
d = 100
scaled_scores_linear = c + (d - c) * (sorted_scores - a) / (b - a)

# 非线性放缩得分（对数放缩）
# 为了避免对数函数输入为 0，给得分加上一个小的正数
epsilon = 1e-6
scaled_scores_log = np.log(sorted_scores + epsilon)

result_df = pd.DataFrame({
    'gene_id': sorted_nodes,
    'score': scaled_scores_linear
})

# 导出为 CSV 文件
result_df.to_csv('../.././dataProcessedResult/geneWeight/GCN_scores.csv', index=False)

print("结果已导出到 GCN_scores.csv")
    

结果已导出到 gene_similarity_scores.csv
