### Network Fusion
- Might consider temporary indicators (epoch)
- Might also add linguistic features like formality and toxicity, if time allows

In [1]:
import pickle
import networkx as nx


In [2]:
en_text_graph = pickle.load(open('en_text_graph.gpickle', 'rb'))
en_co_url_graph = pickle.load(open('en_co_url_graph.gpickle', 'rb'))
en_co_hashtag_graph = nx.read_gexf("en_co_hashtag_graph.gexf") 
en_co_mentioned_graph = nx.read_gexf("en_co_mentioned_graph.gexf") 


In [3]:
es_text_graph = pickle.load(open('es_text_graph.gpickle', 'rb'))
es_co_url_graph = pickle.load(open('es_co_url_graph.gpickle', 'rb'))
es_co_hashtag_graph = nx.read_gexf("es_co_hashtag_graph.gexf") 
es_co_mentioned_graph = nx.read_gexf("es_co_mentioned_graph.gexf") 


### 1. English network fusion

In [4]:
# Combine nodes from all networks
en_all_nodes = set(en_text_graph.nodes()) | set(en_co_url_graph.nodes()) | set(en_co_hashtag_graph.nodes()) | set(en_co_mentioned_graph.nodes())

# Ensure all graphs have the same node set
for graph in [en_text_graph, en_co_url_graph, en_co_hashtag_graph, en_co_mentioned_graph]:
    for node in en_all_nodes:
        if node not in graph:
            graph.add_node(node)  # Add missing nodes


In [5]:
# Initialize fused graph
en_fused_graph = nx.Graph()

# Add edges from all individual networks
for graph in [en_text_graph, en_co_url_graph, en_co_hashtag_graph, en_co_mentioned_graph]:
    for u, v, data in graph.edges(data=True):
        # Add edge to the fused graph (combine weights if desired)
        if en_fused_graph.has_edge(u, v):
            en_fused_graph[u][v]['weight'] += data.get('weight', 1)
        else:
            en_fused_graph.add_edge(u, v, weight=data.get('weight', 1))


In [6]:
en_fused_graph.size()

495841

In [11]:
en_fused_graph.number_of_edges()

495841

#### Edge filtering

In [20]:
import numpy as np
def filter_edges_by_weight(graph, weight_threshold):
    """
    Filter edges in the graph based on a weight threshold.
    """
    filtered_graph = nx.Graph()
    for u, v, data in graph.edges(data=True):
        if data.get('weight', 0) >= weight_threshold:
            filtered_graph.add_edge(u, v, weight=data['weight'])
    return filtered_graph

# Analyze edge weights to set a threshold
en_edge_weights = [data['weight'] for _, _, data in en_fused_graph.edges(data=True)]
en_percentile_50 = np.percentile(en_edge_weights, 50)

# Filter edges in the fused network
en_filtered_graph = filter_edges_by_weight(en_fused_graph, weight_threshold=en_percentile_50)

print(f"Number of nodes: {en_filtered_graph.number_of_nodes()}")
print(f"Number of edges: {en_filtered_graph.number_of_edges()}")


Number of nodes: 3618
Number of edges: 247921


#### Node Pruning

In [21]:
en_eigen_centrality = nx.eigenvector_centrality(en_fused_graph, max_iter=1000)

nx.set_node_attributes(en_fused_graph, en_eigen_centrality, "eigenvector_centrality")

def prune_nodes_by_centrality(graph, centrality, centrality_threshold):
    """
    Prune nodes in the graph based on an eigenvector centrality threshold.
    """
    nodes_to_keep = [node for node, value in centrality.items() if value >= centrality_threshold]
    pruned_graph = graph.subgraph(nodes_to_keep).copy()
    return pruned_graph

# Analyze centrality values to set a threshold
en_centrality_values = list(en_eigen_centrality.values())
en_percentile_50 = np.percentile(en_centrality_values, 50)

# Prune nodes based on eigenvector centrality
en_pruned_graph = prune_nodes_by_centrality(en_filtered_graph, en_eigen_centrality, centrality_threshold=en_percentile_50)

print(f"Number of nodes: {en_pruned_graph.number_of_nodes()}")
print(f"Number of edges: {en_pruned_graph.number_of_edges()}")


Number of nodes: 1807
Number of edges: 106400


### 2. Spanish:

In [12]:
# Combine nodes from all networks
es_all_nodes = set(es_text_graph.nodes()) | set(es_co_url_graph.nodes()) | set(es_co_hashtag_graph.nodes()) | set(es_co_mentioned_graph.nodes())

# Ensure all graphs have the same node set
for graph in [es_text_graph, es_co_url_graph, es_co_hashtag_graph, es_co_mentioned_graph]:
    for node in es_all_nodes:
        if node not in graph:
            graph.add_node(node)  # Add missing nodes

# Initialize fused graph
es_fused_graph = nx.Graph()

# Add edges from all individual networks
for graph in [es_text_graph, es_co_url_graph, es_co_hashtag_graph, es_co_mentioned_graph]:
    for u, v, data in graph.edges(data=True):
        # Add edge to the fused graph (combine weights if desired)
        if es_fused_graph.has_edge(u, v):
            es_fused_graph[u][v]['weight'] += data.get('weight', 1)
        else:
            es_fused_graph.add_edge(u, v, weight=data.get('weight', 1))


In [15]:
# Analyze edge weights to set a threshold
es_edge_weights = [data['weight'] for _, _, data in es_fused_graph.edges(data=True)]
es_percentile_70 = np.percentile(es_edge_weights, 70)

# Filter edges in the fused network
es_filtered_graph = filter_edges_by_weight(es_fused_graph, weight_threshold=es_percentile_70)

print(f"Number of nodes: {es_filtered_graph.number_of_nodes()}")
print(f"Number of edges: {es_filtered_graph.number_of_edges()}")


Number of nodes: 5225
Number of edges: 278236


In [16]:
es_eigen_centrality = nx.eigenvector_centrality(es_fused_graph, max_iter=1000)

nx.set_node_attributes(es_fused_graph, es_eigen_centrality, "eigenvector_centrality")


# Analyze centrality values to set a threshold
es_centrality_values = list(es_eigen_centrality.values())
es_percentile_70 = np.percentile(es_centrality_values, 70)

# Prune nodes based on eigenvector centrality
es_pruned_graph = prune_nodes_by_centrality(es_filtered_graph, es_eigen_centrality, centrality_threshold=es_percentile_70)

print(f"Number of nodes: {es_pruned_graph.number_of_nodes()}")
print(f"Number of edges: {es_pruned_graph.number_of_edges()}")

Number of nodes: 1890
Number of edges: 154079
