In [1]:
import networkx as nx
import numpy as np
import pymetis
import copy


def read_graphml_file(filename):
    """Reads a graph from a GraphML file and returns a NetworkX graph object."""
    try:
        # Read the graph from the file
        graph = nx.read_graphml(filename)

        # Return the graph object
        return graph
    except Exception as e:
        print(f"Error: {e}")
        return None
    
def get_partition(net):
    adj_dict = net.get_adjacency_dict()
    adj_list = [np.asarray(neighs) for neighs in adj_dict.values()]
    n_cuts, membership = pymetis.part_graph(nparts = 2, adjacency = adj_list, options = pymetis.Options(ufactor=400, niter=100, contig=True))
    membership = dict(zip(adj_dict.keys(), membership))
    return n_cuts, membership

In [2]:
class SocialNetwork():

    def __init__(self, name, filename):
        self.name = name
        self.directed_graph = read_graphml_file(filename)
        self.undirected_graph = self.directed_graph.to_undirected()

        largest_cc = max(nx.connected_components(self.undirected_graph), key=len)
        self.giant_component = self.undirected_graph.subgraph(largest_cc).copy()

        self.giant_component_int = nx.convert_node_labels_to_integers(self.giant_component, first_label = 0, ordering = 'default', label_attribute = 'user_id')

    def get_giant_component_fraction(self):
        return len(self.giant_component)/len(self.undirected_graph)

    def get_adjacency_dict(self):
        adj_list = {}
        for node in self.giant_component_int.nodes:
            neighbors = list(self.giant_component_int.neighbors(node))
            adj_list[node] = neighbors
        return adj_list

In [3]:
filename = "./keywords_non_universal_stream_processed/CLIMATE_2023_net.graphml"
net = SocialNetwork(name = "PARTIES2023", filename = filename)

n_cuts, membership = get_partition(net)

# ATTRIBUTE 1: Original partition
nx.set_node_attributes(net.giant_component_int, membership, name="cluster")


In [4]:
def finetune_partition(net, membership):

    potential_bridge_nodes = []
    loner_nodes = []

    for node in net.giant_component_int.nodes:
        neighbors = net.giant_component_int.neighbors(node)
        neighbors_cluster = set([net.giant_component_int.nodes[n]["cluster"] for n in neighbors])
        if membership[node] not in neighbors_cluster:
            loner_nodes.append(node)

    membership_finetuned = copy.deepcopy(membership)

    c0 = {k for k, v in membership.items() if v == 0}
    c1 = {k for k, v in membership.items() if v == 1}

    q_best = nx.community.modularity(net.giant_component_int, [c0, c1])
    print(f"Before finetuning modularity is {q_best}")

    for node in loner_nodes:

        if membership[node] == 0:
            membership_finetuned[node] = 1
            new_label = 1
        else:
            membership_finetuned[node] = 0
            new_label = 0
        
        c0_candidate = {k for k, v in membership_finetuned.items() if v == 0}
        c1_candidate = {k for k, v in membership_finetuned.items() if v == 1}

        new_q = nx.community.modularity(net.giant_component_int, [c0_candidate, c1_candidate])
        
        if new_q > q_best:
            print(f"Improvement {new_q - q_best} by swapping node {node}")
            membership[node] = new_label
            q_best = new_q

        else:
            print(f"No improvement by swapping node {node}")
            membership_finetuned[node] = 1-new_label
            
    print(f"After finetuning modularity is {q_best}")
    return membership

In [5]:
membership_original = copy.deepcopy(membership)
membership = finetune_partition(net, membership)

# ATTRIBUTE 2: Finetuned partition
nx.set_node_attributes(net.giant_component_int, membership, name="finetuned_cluster")
#nx.write_graphml_lxml(net.giant_component_int, "simulation-PARTIES-4.graphml")


Before finetuning modularity is 0.4514598003065425
Improvement 0.00013978125784591677 by swapping node 331
Improvement 0.00020961512664796622 by swapping node 677
Improvement 0.00013970557768455683 by swapping node 1758
Improvement 0.0001396753056202571 by swapping node 2657
Improvement 6.982630078578023e-05 by swapping node 2878
Improvement 0.00013962989752347443 by swapping node 3095
Improvement 6.980359673747216e-05 by swapping node 3149
Improvement 0.00020936538211580036 by swapping node 3290
Improvement 6.977332467295039e-05 by swapping node 3470
Improvement 0.00013952394529770373 by swapping node 3475
Improvement 6.97506206245313e-05 by swapping node 3625
Improvement 0.0002092064537770888 by swapping node 4099
Improvement 6.972034856012055e-05 by swapping node 4142
Improvement 6.971278054396235e-05 by swapping node 4711
After finetuning modularity is 0.45320489022498006


In [115]:
# ATTRIBUTE 3: Candidate information
import pandas as pd

candidates = pd.read_csv("candidates-2023.csv")
candidates_full = pd.read_csv("candidates2023-complete.csv")

id_2_candidate = dict(zip(candidates.id.astype(str), candidates.screen_name))
candidate_2_id = dict(zip(candidates.screen_name, candidates.id.astype(str)))

candidates_full['twitter_id'] = candidates_full['screen_name'].map(candidate_2_id)

id_2_party = dict(zip(candidates_full.twitter_id, candidates_full.puolue))
id_2_age = dict(zip(candidates_full.twitter_id, candidates_full.ikä))
id_2_sex = dict(zip(candidates_full.twitter_id, candidates_full.sukupuoli))
id_2_hometown = dict(zip(candidates_full.twitter_id, candidates_full.kotikunta))
id_2_lang = dict(zip(candidates_full.twitter_id, candidates_full.kieli))

In [132]:
screen_name_attributes = dict()
party_attributes = dict()
sex_attributes = dict()
language_attributes = dict()

for node in net.giant_component_int.nodes():
    node_user_id = net.giant_component_int.nodes[node]["user_id"]
    try:
        if node_user_id in id_2_candidate.keys():
            screen_name_attributes[node] = id_2_candidate[node_user_id].rstrip()
            party_attributes[node] = id_2_party[node_user_id].rstrip()
            sex_attributes[node] = id_2_sex[node_user_id]
            language_attributes[node] = id_2_lang[node_user_id]
        else:
            screen_name_attributes[node] = "NA"
            party_attributes[node] = "NA"
            sex_attributes[node] = "NA"
            language_attributes[node] = "NA"
    except:
        screen_name_attributes[node] = "NA"
        party_attributes[node] = "NA"
        sex_attributes[node] = "NA"
        language_attributes[node] = "NA"
        print(f"Error with node {node_user_id}")

nx.set_node_attributes(net.giant_component_int, screen_name_attributes, "screen_name")
nx.set_node_attributes(net.giant_component_int, party_attributes, "party")
nx.set_node_attributes(net.giant_component_int, sex_attributes, "sex")
nx.set_node_attributes(net.giant_component_int, language_attributes, "language")

Error with node 705853130915889154
Error with node 2311345552
Error with node 833352107096997890


In [133]:
net.giant_component_int.nodes(data=True)[25]

{'user_id': '798095178',
 'cluster': 1,
 'finetuned_cluster': 1,
 'screen_name': 'MariaOhisalo',
 'party': 'vihr',
 'sex': 2,
 'language': 'fi'}

In [134]:
nx.write_graphml_lxml(net.giant_component_int, "./networks/RICH_CLIMATE_2023_NET.graphml")

In [None]:
import random

def swap_node(c0, c1, bridge_nodes):

    index = random.randint(0, len(bridge_nodes) - 1)

    if bridge_nodes[index] in c0:
        c0.remove(bridge_nodes[index])
        c1.append(bridge_nodes[index])
    else:
        c1.remove(bridge_nodes[index])
        c0.append(bridge_nodes[index])

    return c0, c1

cluster0_before_finetuning = cluster0_best = cluster0_candidate = list({k for k, v in membership.items() if v == 0})
cluster1_before_finetuning = cluster1_best = cluster1_candidate = list({k for k, v in membership.items() if v == 1})

q_before_finetuning = q_best = nx.community.modularity(net.giant_component_int, [cluster0_best, cluster1_best])

for node in potential_bridge_nodes:
    cluster0_candidate = cluster0_best
    cluster1_candidate =  cluster1_best
    #cluster0, cluster1 = swap_node(cluster0_best, cluster1_best, potential_bridge_nodes)
    if node in cluster0_best:
        cluster0_candidate.remove(node)
        cluster1_candidate.append(node)
    else:
        cluster1_candidate.remove(node)
        cluster0_candidate.append(node)

    new_q = nx.community.modularity(net.giant_component_int, [cluster0_candidate, cluster1_candidate])
 
    if new_q > q_best:
        print(f"Improvement {new_q - q_best}")
        cluster0_best = cluster0_candidate
        cluster1_best = cluster1_candidate

        q_best = new_q


In [None]:
q_best, q_before_finetuning

In [None]:

for _ in range(500):
    set1, set2 = swap_node(set1_best, set2_best)
    

    if new_q > best_q:
        set1_best = set1
        set2_best = set2
        best_q = new_q
        print("bingo")


In [None]:


sets = [cluster0_before_finetuning, cluster1_before_finetuning]

original_q = nx.community.modularity(net.giant_component_int, sets)
best_q = nx.community.modularity(net.giant_component_int, sets)

import random

def swap_sets(set1, set2):
    
    set1_list = list(set1)
    set2_list = list(set2)
    
    index = random.randint(0, len(set1) - 1)
    set1_list[index], set2_list[index] = set2_list[index], set1_list[index]
    
    return set(set1_list), set(set2_list)


for _ in range(5000):
    set1, set2 = swap_sets(cluster0_before_finetuning, cluster1_before_finetuning)
    sets = [set1, set2]
    new_q = nx.community.modularity(net.giant_component_int, sets)

    if new_q > best_q:
        set1_best = set1
        set2_best = set2
        best_q = new_q
        print("bingo")


print(f"The best modularity was before the finetuning {original_q}, now it is {best_q}")

In [None]:
finetuned_membership = dict()

for node in net.giant_component_int.nodes:
    if node in set1_best:
        finetuned_membership[node] = 1

    else:
        finetuned_membership[node] = 0


nx.set_node_attributes(net.giant_component_int, membership, name="finetuned_cluster")
nx.write_graphml_lxml(net.giant_component_int, "simulation-climate-3.graphml")

In [None]:
# Short sim

import networkx as nx

# create an empty graph
G = nx.Graph()

# add nodes to the graph
for i in range(1, 8):
    G.add_node(i)

# add edges to the graph to create a chain
G.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)])

# draw the graph using matplotlib
colors = ["b", "b", "b", "g", "g", "g", "g"]
nx.draw(G, with_labels=True, node_color=colors)

nx.community.modularity(G, [{1, 2, 3, 4}, {4, 5, 6, 7}])
