In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import os

In [3]:
txt_files = [file for file in os.listdir() if file.endswith('.txt')]
for edges_file_path in txt_files:
    print(f"Processing file: {edges_file_path}")
    edges_df = pd.read_csv(edges_file_path, sep=" ", header=None, names=["from", "to"])

    # 创建一个无向图
    G = nx.from_pandas_edgelist(edges_df, source='from', target='to')
    # Calculate basic graph metrics
    # 检查图的基本信息
    print(f"节点数量: {G.number_of_nodes()}")
    print(f"边数量: {G.number_of_edges()}")
    print(f"平均聚类系数: {nx.average_clustering(G)}")
    print(f"连通分量数量: {nx.number_connected_components(G)}")
    print(f"是否是连通图: {nx.is_connected(G)}")

    # Calculate degree statistics
    degrees = [d for n, d in G.degree()]
    avg_degree = sum(degrees) / len(degrees)
    print(f"平均度: {avg_degree:.4f}")
    print(f"最大度: {max(degrees)}")
    print(f"最小度: {min(degrees)}")

    # Calculate shortest path statistics
    if nx.is_connected(G):
        avg_shortest_path = nx.average_shortest_path_length(G)
        diameter = nx.diameter(G)
        print(f"平均最短路径长度: {avg_shortest_path:.4f}")
        print(f"网络直径: {diameter}")
    else:
        largest_cc = max(nx.connected_components(G), key=len)
        largest_subgraph = G.subgraph(largest_cc)
        print(f"最大连通分量的节点数: {largest_subgraph.number_of_nodes()}")
        print(f"最大连通分量的边数: {largest_subgraph.number_of_edges()}")
        avg_shortest_path = nx.average_shortest_path_length(largest_subgraph)
        diameter = nx.diameter(largest_subgraph)
        print(f"最大连通分量的平均最短路径长度: {avg_shortest_path:.4f}")
        print(f"最大连通分量的网络直径: {diameter}")

Processing file: cs_edges.txt
节点数量: 18333
边数量: 81894
平均聚类系数: 0.34252338196986143
连通分量数量: 1
是否是连通图: True
平均度: 8.9341
最大度: 136
最小度: 1
平均最短路径长度: 5.4277
网络直径: 24
Processing file: Twitter_edges.txt
节点数量: 256491
边数量: 327374
平均聚类系数: 0.015610363167864613
连通分量数量: 13199
是否是连通图: False
平均度: 2.5527
最大度: 14061
最小度: 1
最大连通分量的节点数: 223833
最大连通分量的边数: 307884


KeyboardInterrupt: 

In [23]:
import pandas as pd
import networkx as nx
import os

# Specify the single file you want to process
edges_file_path = "yeast_edges.txt"

# Check if the file exists before processing
if not os.path.exists(edges_file_path):
    print(f"Error: The file '{edges_file_path}' was not found.")
else:
    print(f"Processing file: {edges_file_path}")

    # Read the file and create the graph
    edges_df = pd.read_csv(edges_file_path, sep=" ", header=None, names=["from", "to"])
    G = nx.from_pandas_edgelist(edges_df, source='from', target='to')

    # Calculate basic graph metrics
    print(f"节点数量: {G.number_of_nodes()}")
    print(f"边数量: {G.number_of_edges()}")
    print(f"平均聚类系数: {nx.average_clustering(G)}")
    print(f"连通分量数量: {nx.number_connected_components(G)}")
    print(f"是否是连通图: {nx.is_connected(G)}")

    # Calculate degree statistics
    degrees = [d for n, d in G.degree()]
    avg_degree = sum(degrees) / len(degrees)
    print(f"平均度: {avg_degree:.4f}")
    print(f"最大度: {max(degrees)}")
    print(f"最小度: {min(degrees)}")

    # Calculate shortest path statistics
    if nx.is_connected(G):
        avg_shortest_path = nx.average_shortest_path_length(G)
        diameter = nx.diameter(G)
        print(f"平均最短路径长度: {avg_shortest_path:.4f}")
        print(f"网络直径: {diameter}")
    else:
        largest_cc = max(nx.connected_components(G), key=len)
        largest_subgraph = G.subgraph(largest_cc)
        print(f"最大连通分量的节点数: {largest_subgraph.number_of_nodes()}")
        print(f"最大连通分量的边数: {largest_subgraph.number_of_edges()}")
        avg_shortest_path = nx.average_shortest_path_length(largest_subgraph)
        diameter = nx.diameter(largest_subgraph)
        print(f"最大连通分量的平均最短路径长度: {avg_shortest_path:.4f}")
        print(f"最大连通分量的网络直径: {diameter}")

Processing file: yeast_edges.txt
节点数量: 2361
边数量: 7182
平均聚类系数: 0.13011713635603464
连通分量数量: 101
是否是连通图: False
平均度: 6.0839
最大度: 66
最小度: 1
最大连通分量的节点数: 2224
最大连通分量的边数: 7049
最大连通分量的平均最短路径长度: 4.3763
最大连通分量的网络直径: 11


In [11]:
import networkx as nx
import numpy as np

# 读取边列表文件（假设空格分隔，节点用整数表示）
G = nx.read_edgelist("photo_edges.txt", nodetype=int)

print("节点数量:", G.number_of_nodes())
print("边数量:", G.number_of_edges())

# 1. 计算全局聚类系数 (Global Clustering Coefficient)
gcc = nx.transitivity(G)
print("全局聚类系数 (GCC):", gcc)

# 2. 计算度的变异系数 (Coefficient of Variation, CV)
degrees = [d for _, d in G.degree()]
mu = np.mean(degrees)
sigma = np.std(degrees)
cv = sigma / mu if mu > 0 else 0
print("度的变异系数 (CV):", cv)

# 如果需要更详细的度信息
print("平均度:", mu)
print("最大度:", np.max(degrees))
print("最小度:", np.min(degrees))


节点数量: 7535
边数量: 119081
全局聚类系数 (GCC): 0.17733148009494923
度的变异系数 (CV): 1.5020513763007808
平均度: 31.60743198407432
最大度: 1434
最小度: 1


In [2]:
# 先在终端中输入python或python3进入Python环境
import torch_geometric
# 进入Python环境后（会看到>>>提示符），再执行你的代码
from torch_geometric.datasets import Amazon
dataset = Amazon(root='./data/Amazon', name='Photo')
dataset = Amazon(root='./data/Amazon', name='Computers')

ModuleNotFoundError: No module named 'torch'