In [8]:
import networkx as nx
import pickle
import os

pkl_directory = '../data/trimmed_networks_pkl'

graphs = {}

for filename in os.listdir(pkl_directory):
    if filename.endswith('.pkl'):
        filepath = os.path.join(pkl_directory, filename)
        with open(filepath, 'rb') as f:
            G = pickle.load(f)
            graphs[filename] = G  # Store the graph in the dictionary
            print(f"Loaded graph from {filename}:")
            print(f"Number of nodes: {G.number_of_nodes()}, Number of edges: {G.number_of_edges()}")

for graph_name, graph in graphs.items():
    print(f"Graph: {graph_name}, Nodes: {graph.number_of_nodes()}, Edges: {graph.number_of_edges()}")





Loaded graph from 2_resource_allocation_naive.pkl:
Number of nodes: 18643, Number of edges: 42691
Loaded graph from 2_simple_disparity_filter.pkl:
Number of nodes: 18643, Number of edges: 336278
Loaded graph from 1_simple_disparity_filter.pkl:
Number of nodes: 18643, Number of edges: 443701
Loaded graph from 1_resource_allocation_naive.pkl:
Number of nodes: 18643, Number of edges: 15588
Graph: 2_resource_allocation_naive.pkl, Nodes: 18643, Edges: 42691
Graph: 2_simple_disparity_filter.pkl, Nodes: 18643, Edges: 336278
Graph: 1_simple_disparity_filter.pkl, Nodes: 18643, Edges: 443701
Graph: 1_resource_allocation_naive.pkl, Nodes: 18643, Edges: 15588


In [None]:
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import label_propagation_communities
from networkx.algorithms.community import modularity
from networkx.algorithms.assortativity import attribute_assortativity_coefficient

graph_sd_1 = graphs['1_simple_disparity_filter.pkl']
graph_sd_2 = graphs['2_simple_disparity_filter.pkl']
graph_rn_1 = graphs['1_resource_allocation_naive.pkl']
graph_rn_2 = graphs['2_resource_allocation_naive.pkl']

isolated_nodes_1 = list(nx.isolates(graph_sd_1))
graph_sd_1.remove_nodes_from(isolated_nodes_1)
print(len(graph_sd_1.nodes))

isolated_nodes_2 = list(nx.isolates(graph_sd_2))
graph_sd_2.remove_nodes_from(isolated_nodes_2)
print(len(graph_sd_2.nodes))

isolated_nodes_3 = list(nx.isolates(graph_rn_1))
graph_rn_1.remove_nodes_from(isolated_nodes_3)
print(len(graph_rn_1.nodes))

isolated_nodes_4 = list(nx.isolates(graph_rn_2))
graph_rn_2.remove_nodes_from(isolated_nodes_4)
print(len(graph_rn_2.nodes))

# pikle the graphs
with open('../data/trimmed_networks_pkl/1_simple_disparity_filter.pkl', 'wb') as f:
    pickle.dump(graph_sd_1, f)
with open('../data/trimmed_networks_pkl/2_simple_disparity_filter.pkl', 'wb') as f:
    pickle.dump(graph_sd_2, f)
with open('../data/trimmed_networks_pkl/1_resource_allocation_naive.pkl', 'wb') as f:
    pickle.dump(graph_rn_1, f)
with open('../data/trimmed_networks_pkl/2_resource_allocation_naive.pkl', 'wb') as f:
    pickle.dump(graph_rn_2, f)






# Define a function to compute and print assortativity for a given attribute
def compute_assortativity(graph, attribute):
    try:
        assortativity = nx.attribute_assortativity_coefficient(graph, attribute)
        print(f"Assortativity for '{attribute}': {assortativity}")
        return assortativity
    except Exception as e:
        print(f"Error computing assortativity for '{attribute}': {e}")
        return None

# Example for Graph 1
print("Graph simple count and disparity filter with threshold 0.5 Assortativities:")
compute_assortativity(graph_sd_1, "most_frequent_locality")
compute_assortativity(graph_sd_1, "most_frequent_parasite_group")
compute_assortativity(graph_sd_1, "animals_group")

# Example for Graph 2
print("\nGraph simple count and disparity filter with threshold 0.57 Assortativities:")
compute_assortativity(graph_sd_2, "most_frequent_locality")
compute_assortativity(graph_sd_2, "most_frequent_parasite_group")
compute_assortativity(graph_sd_2, "animals_group")

# Example for Graph 3
print("\nGraph resource allocation and naive with threshold 0.0349 on score (not nij) Assortativities:")
compute_assortativity(graph_rn_1, "most_frequent_locality")
compute_assortativity(graph_rn_1, "most_frequent_parasite_group")
compute_assortativity(graph_rn_1, "animals_group")

print("\nGraph resource allocation and naive with threshold 0.01745 on score (not nij) Assortativities:")
compute_assortativity(graph_rn_2, "most_frequent_locality")
compute_assortativity(graph_rn_2, "most_frequent_parasite_group")
compute_assortativity(graph_rn_2, "animals_group")


16075
14911
9213
12770
Graph simple count and disparity filter with threshold 0.5 Assortativities:
Assortativity for 'most_frequent_locality': 0.4712269206805252
Assortativity for 'most_frequent_parasite_group': 0.14224257376327284
Assortativity for 'animals_group': 0.740386063473734

Graph simple count and disparity filter with threshold 0.57 Assortativities:
Assortativity for 'most_frequent_locality': 0.49258958275632997
Assortativity for 'most_frequent_parasite_group': 0.11295808672649627
Assortativity for 'animals_group': 0.7539672961799521

Graph resource allocation and naive with threshold 0.0349 on score (not nij) Assortativities:
Assortativity for 'most_frequent_locality': 0.8046498397680208
Assortativity for 'most_frequent_parasite_group': 0.48336212859350697
Assortativity for 'animals_group': 0.7655905567624477

Graph resource allocation and naive with threshold 0.01745 on score (not nij) Assortativities:
Assortativity for 'most_frequent_locality': 0.7593732620754623
Assortat

0.7692179565334559

In [None]:
def compute_communities_and_modularity(graph):
    results = {}
    
    # Label Propagation
    label_communities = list(nx.community.label_propagation_communities(graph))
    label_modularity = nx.community.modularity(graph, label_communities)
    results['label_propagation'] = {
        'communities': label_communities,
        'modularity': label_modularity
    }
    print("Label Propagation - Modularity:", label_modularity)
    
    # Greedy Modularity Maximization
    greedy_communities = list(nx.community.greedy_modularity_communities(graph))
    greedy_modularity = nx.community.modularity(graph, greedy_communities)
    results['greedy_modularity'] = {
        'communities': greedy_communities,
        'modularity': greedy_modularity
    }
    print("Greedy Modularity Maximization - Modularity:", greedy_modularity)
    
    return results

#Compute communities and modularity for each graph
print("Graph simple count and disparity filter with threshold 0.5:")
graph_sd_1_results = compute_communities_and_modularity(graph_sd_1)

print("\nGraph simple count and disparity filter with threshold 0.57:")
graph_sd_2_results = compute_communities_and_modularity(graph_sd_2)

print("\nGraph resource allocation and naive with threshold 0.0349 on score (not nij):")
graph_rn_1_results = compute_communities_and_modularity(graph_rn_1)

print("\nGraph resource allocation and naive with threshold 0.01745 on score (not nij):")
graph_rn_2_results = compute_communities_and_modularity(graph_rn_2)


# pikle the graphs
with open('../data/communities_and_modularity/1_simple_disparity_filter.pkl', 'wb') as f:
    pickle.dump(graph_sd_1_results, f)
with open('../data/communities_and_modularity/2_simple_disparity_filter.pkl', 'wb') as f:
    pickle.dump(graph_sd_2_results, f)
with open('../data/communities_and_modularity/1_resource_allocation_naive.pkl', 'wb') as f:
    pickle.dump(graph_rn_1_results, f)
with open('../data/communities_and_modularity/2_resource_allocation_naive.pkl', 'wb') as f:
    pickle.dump(graph_rn_2_results, f)

Graph simple count and disparity filter with threshold 0.5:
Label Propagation - Modularity: 0.6628264757191553
Greedy Modularity Maximization - Modularity: 0.6006807751603583

Graph simple count and disparity filter with threshold 0.57:
Label Propagation - Modularity: 0.6747890075721703
Greedy Modularity Maximization - Modularity: 0.6279917740621505

Graph resource allocation and naive with threshold 0.0349 on score (not nij):
Label Propagation - Modularity: 0.8737346500459714
Greedy Modularity Maximization - Modularity: 0.9756102337427665

Graph resource allocation and naive with threshold 0.01745 on score (not nij):
Label Propagation - Modularity: 0.7847556856862089
Greedy Modularity Maximization - Modularity: 0.8715154202893041


In [31]:
len(graph_sd_1_results['label_propagation']['communities'])
len(graph_sd_1_results['greedy_modularity']['communities'])

len(graph_sd_2_results['label_propagation']['communities'])
len(graph_sd_2_results['greedy_modularity']['communities'])

len(graph_rn_1_results['label_propagation']['communities'])
len(graph_rn_1_results['greedy_modularity']['communities'])

len(graph_rn_2_results['label_propagation']['communities'])
len(graph_rn_2_results['greedy_modularity']['communities'])


760

In [29]:
from sklearn.metrics import adjusted_mutual_info_score

# Function to compute Adjusted Mutual Information (AMI)
def compute_ami(graph, communities, attribute):
    # Map nodes to their community labels
    node_to_community = {}
    for idx, community in enumerate(communities):
        for node in community:
            node_to_community[node] = idx

    # Extract the ground-truth labels based on the given attribute
    node_labels = []
    community_labels = []
    for node in graph.nodes():
        if attribute in graph.nodes[node]:
            node_labels.append(graph.nodes[node][attribute])
            community_labels.append(node_to_community[node])

    # Compute AMI
    ami_score = adjusted_mutual_info_score(node_labels, community_labels)
    return ami_score

# Function to compute AMI for all attributes and both community detection methods
def compute_ami_for_graph(graph, results, attributes):
    ami_results = {}
    
    for method in ['label_propagation', 'greedy_modularity']:
        print(f"\n{method.capitalize()} Communities:")
        communities = results[method]['communities']
        ami_results[method] = {}
        
        for attribute in attributes:
            ami = compute_ami(graph, communities, attribute)
            ami_results[method][attribute] = ami
            print(f"Adjusted Mutual Information (AMI) for {attribute}: {ami}")
    
    return ami_results

# List of attributes
attributes = ["most_frequent_locality", "most_frequent_parasite_group", "animals_group"]

#Compute AMI for each graph and attribute
print("Graph simple count and disparity filter with threshold 0.5:")
ami_results_sd_1 = compute_ami_for_graph(graph_sd_1, graph_sd_1_results, attributes)

print("\nGraph simple count and disparity filter with threshold 0.57:")
ami_results_sd_2 = compute_ami_for_graph(graph_sd_2, graph_sd_2_results, attributes)

print("\nGraph resource allocation and naive with threshold 0.0349 on score (not nij):")
ami_results_rn_1 = compute_ami_for_graph(graph_rn_1, graph_rn_1_results, attributes)

print("\nGraph resource allocation and naive with threshold 0.01745 on score (not nij):")
ami_results_rn_1 = compute_ami_for_graph(graph_rn_2, graph_rn_2_results, attributes)



Graph simple count and disparity filter with threshold 0.5:

Label_propagation Communities:
16075
16075
Adjusted Mutual Information (AMI) for most_frequent_locality: 0.3360720338999654
16075
16075
Adjusted Mutual Information (AMI) for most_frequent_parasite_group: 0.09234706909901179
16075
16075
Adjusted Mutual Information (AMI) for animals_group: 0.3824973865605928

Greedy_modularity Communities:
16075
16075
Adjusted Mutual Information (AMI) for most_frequent_locality: 0.23732538395752842
16075
16075
Adjusted Mutual Information (AMI) for most_frequent_parasite_group: 0.07487810533955537
16075
16075
Adjusted Mutual Information (AMI) for animals_group: 0.38677856056691945

Graph simple count and disparity filter with threshold 0.57:

Label_propagation Communities:
14911
14911
Adjusted Mutual Information (AMI) for most_frequent_locality: 0.337506515431428
14911
14911
Adjusted Mutual Information (AMI) for most_frequent_parasite_group: 0.0930606450009562
14911
14911
Adjusted Mutual Informa