In [7]:
import pandas as pd

# Load the data
df = pd.read_csv('PedInf.txt', delimiter='\t')

# Create edge list
edges = []
for _, row in df.iterrows():
    if pd.notna(row['MaleParent']):
        edges.append((row['MaleParent'], row['LineName']))
    if pd.notna(row['FemaleParent']):
        edges.append((row['FemaleParent'], row['LineName']))

edges_df = pd.DataFrame(edges, columns=["Parent", "Child"])

# Save edges to CSV for loading into cuGraph
edges_df.to_csv("edges.csv", index=False)


In [19]:
import cudf
import cugraph

# Load the edges into cuDF
edges_df = cudf.read_csv("edges.csv")

# Create an undirected graph directly
G = cugraph.Graph(directed=False)
G.from_cudf_edgelist(edges_df, source='Parent', destination='Child')

# Perform Louvain community detection
louvain_parts, modularity = cugraph.louvain(G)
louvain_parts = louvain_parts.to_pandas()

# Display the results
print("Louvain Community Detection")
print(louvain_parts.head(10))




Louvain Community Detection
   partition      vertex
0         27   CP95-1987
1          4   CP06-2707
2          3   CP69-0391
3          6   CP13-1137
4          6   CP10-1246
5         25   CP17-1729
6         26   CP07-1183
7         17   CP91-1034
8          3  LCP85-0322
9          9   CP12-1176


In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Convert cuGraph edges to a pandas DataFrame
edges_df = G.view_edge_list().to_pandas()

# Create a NetworkX graph from the edge list
G_nx = nx.from_pandas_edgelist(edges_df, source='Parent', target='Child')

# Add Louvain community information to the NetworkX graph
louvain_communities = louvain_parts.set_index('vertex')['partition'].to_dict()
nx.set_node_attributes(G_nx, louvain_communities, 'louvain')

# Calculate various centrality metrics for each vertex
degree_centrality = nx.degree_centrality(G_nx)
betweenness_centrality = nx.betweenness_centrality(G_nx)
closeness_centrality = nx.closeness_centrality(G_nx)
eigenvector_centrality = nx.eigenvector_centrality(G_nx)
pagerank = nx.pagerank(G_nx)
clustering_coefficient = nx.clustering(G_nx)

# Create a DataFrame to hold vertex, partition, and centrality information
centrality_df = pd.DataFrame({
    'vertex': list(degree_centrality.keys()),
    'degree_centrality': list(degree_centrality.values()),
    'betweenness_centrality': list(betweenness_centrality.values()),
    'closeness_centrality': list(closeness_centrality.values()),
    'eigenvector_centrality': list(eigenvector_centrality.values()),
    'pagerank': list(pagerank.values()),
    'clustering_coefficient': list(clustering_coefficient.values())
})

# Add partition information to the DataFrame
centrality_df['partition'] = centrality_df['vertex'].map(louvain_communities)

# Find the vertex with the greatest contribution to each partition for each metric
max_metrics_per_partition = centrality_df.loc[centrality_df.groupby('partition').idxmax()]

# Print the results
print("Vertex with Greatest Contribution in Each Partition for Each Metric")
print(max_metrics_per_partition)

# Export to CSV
max_metrics_per_partition.to_csv("max_metrics_per_partition.csv", index=False)


In [21]:
# Convert cuGraph edges to a pandas DataFrame
edges_df = G.view_edge_list().to_pandas()

# Print the columns to verify
print(edges_df.columns)

Index(['Parent', 'Child'], dtype='object')
