In [1]:
# Import des différents packages

import networkx as nx
import random
from collections import defaultdict
from typing import Dict, List
import matplotlib
import json
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Enregistrer les caractéristiques dans stats_twitter.json

def save_stats(stats):
    with open("stats_twitter.json", "w") as f:
        json.dump(stats, f, sort_keys=True, indent=4)

def load_stats() -> Dict:
    with open("stats_twitter.json", "r") as f:
        return json.load(f)

def print_stats(stats):
    print(json.dumps(stats, indent=4, sort_keys=True))

In [4]:
# Modification sur le style du graph

def highlight_nodes(graph: nx.Graph, nodes: List = []):
    reset_colors(graph)
    for node in nodes:
        graph.nodes[node]["viz"]["color"] = {'r': 255, 'g': 0, 'b': 0, 'a': 1}

def highlight_nodes_importance(graph: nx.Graph, node_importance : Dict = {}):
    reset_colors(graph)
    min = np.min(list(node_importance.values()))
    max = np.max(list(node_importance.values()))
    norm = matplotlib.colors.Normalize(vmin=min, vmax=max)
    for node, importance in node_importance.items():
        g = int(255 * norm(importance))
        r = 255 - g
        graph.nodes[node]["viz"]["color"] = {'r': r, 'g': g, 'b': 0, 'a': 1}

def highlight_nodes_communities(graph: nx.Graph, communities: List[List] = []):
    reset_colors(graph)
    cmap: LinearSegmentedColormap = plt.get_cmap('hsv')
    for i, community in enumerate(communities):
        (r, g, b, a) = cmap(i / len(communities))
        r, g, b, a = int(r * 255), int(g * 255), int(b * 255), a
        for node in community:
            graph.nodes[node]["viz"]["color"] = {'r': r, 'g': g, 'b': b, 'a': a}


def reset_colors(graph: nx.Graph):
    for node in graph:
        if(not "viz" in graph.nodes[node]):
            graph.nodes[node]["viz"] = {}
        graph.nodes[node]["viz"]["color"] = {'r': 173, 'g': 216, 'b': 230, 'a': 1}


def circular_layout(graph: nx.Graph):
    pos = nx.circular_layout(graph, scale=4*graph.number_of_nodes())
    for node, position in pos.items():
        if(not "viz" in graph.nodes[node]):
            graph.nodes[node]["viz"] = {}
        graph.nodes[node]["viz"]["position"] = {"x": position[0], "y": position[1], "z": 0.0}

In [6]:
DATASET_PATH = 'datasets/twitter/twitter_combined.txt'
STATS = load_stats()

G = nx.read_edgelist(path=DATASET_PATH, create_using=nx.DiGraph(), nodetype=int)

circular_layout(G)
reset_colors(G)

nx.write_gexf(G, "graphs/twitter/graph.gexf")

In [9]:
STATS["number_of_nodes"] =  G.number_of_nodes()
save_stats(STATS)

STATS["number_of_edges"] =  G.number_of_edges()
save_stats(STATS)

## Étude de la centralité

In [11]:
# Calcul de la centralité de degré pour les nœuds échantillonnés

STATS["degree_centrality"] = nx.degree_centrality(G)
save_stats(STATS)

highlight_nodes_importance(G, STATS["degree_centrality"])
nx.write_gexf(G, "graphs/twitter/degree_centrality.gexf")

In [14]:
# Calcul de la centralité de vecteur propre

STATS["eigenvector_centrality"] = nx.eigenvector_centrality(G, max_iter=500, tol=1e-06)
save_stats(STATS)

highlight_nodes_importance(G, STATS["eigenvector_centrality"])
nx.write_gexf(G, "graphs/twitter/eigenvector_centrality.gexf")

In [17]:
# Calcul de la centralité avec pagerank

STATS["pagerank"] = nx.pagerank(G)
save_stats(STATS)

highlight_nodes_importance(G, STATS["pagerank"])
nx.write_gexf(G, "graphs/twitter/pagerank.gexf")

In [18]:
# Calcul la centralité avec closeness_centrality

STATS["closeness_centrality"] = nx.closeness_centrality(G)
save_stats(STATS)
highlight_nodes_importance(G, STATS["closeness_centrality"])
nx.write_gexf(G, "graphs/twitter/closeness_centrality.gexf")

In [None]:
# Calcul la centralité avec betweenness_centrality

STATS["betweenness_centrality"] = nx.betweenness_centrality(G)
save_stats(STATS)
highlight_nodes_importance(G, STATS["betweenness_centrality"])
nx.write_gexf(G, "graphs/twitter/betweenness_centrality.gexf")

## Linear Treshold

In [None]:
# Modèle Linear Threshold pour la diffusion des informations

def linear_threshold_simulation(G, seed_users, threshold):
    active_users = seed_users.copy()
    new_active_users = seed_users.copy()

    while new_active_users:
        new_active_users_temp = []
        for user in new_active_users:
            neighbors = G.neighbors(user)
            for neighbor in neighbors:
                if neighbor not in active_users:
                    active_neighbors = [n for n in G.neighbors(neighbor) if n in active_users]
                    if len(active_neighbors) / G.degree(neighbor) >= threshold:
                        new_active_users_temp.append(neighbor)
                        active_users.append(neighbor)
        new_active_users = new_active_users_temp

    return active_users

In [None]:
# Influence Maximization

def influence_maximization_threshold(G, k, threshold, num_simulations=100):
    seed_users = []

    for _ in range(k):
        max_influence = -1
        best_candidate = None

        for candidate in G.nodes():
            if candidate not in seed_users:
                total_active_users = 0
                for _ in range(num_simulations):
                    active_users = linear_threshold_simulation(G, seed_users + [candidate], threshold)
                    total_active_users += len(active_users)
                average_active_users = total_active_users / num_simulations

                if average_active_users > max_influence:
                    max_influence = average_active_users
                    best_candidate = candidate

        seed_users.append(best_candidate)

    return seed_users

In [None]:
# Choose the top K seed users based on a centrality measure

K = 10
top_degree_users = sorted(STATS["degree_centrality"], key=STATS["degree_centrality"].get, reverse=True)[:K]
top_closeness_users = sorted(STATS["closeness_centrality"], key=STATS["closeness_centrality"].get, reverse=True)[:K]
top_betweenness_users = sorted(STATS["betweenness_centrality"], key=STATS["betweenness_centrality"].get, reverse=True)[:K]
top_eigenvector_users = sorted(STATS["eigenvector_centrality"], key=STATS["eigenvector_centrality"].get, reverse=True)[:K]
top_pagerank_users = sorted(STATS["pagerank"], key=STATS["pagerank"].get, reverse=True)[:K]


seed_user_sets = {
    'degree': top_degree_users,
    'closeness': top_closeness_users,
    'betweenness': top_betweenness_users,
    'eigenvector': top_eigenvector_users,
    'pagerank': top_pagerank_users
}

# Test different threshold values
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]

results = {}
for centrality, seed_users in seed_user_sets.items():
    results[centrality] = {}
    for threshold in thresholds:
        # Run multiple simulations and take the average
        num_simulations = 10
        total_active_users = 0
        for _ in range(num_simulations):
            active_users = linear_threshold_simulation(G, seed_users, threshold)
            total_active_users += len(active_users)
        
        average_active_users = total_active_users / num_simulations
        results[centrality][threshold] = average_active_users

print(results)

## Independent_cascade

In [None]:
# Modèle Independent_cascade pour la diffusion des informations

def independent_cascade_simulation(G, seed_users, p):
    active_users = seed_users.copy()
    new_active_users = seed_users.copy()

    while new_active_users:
        new_active_users_temp = []
        for user in new_active_users:
            neighbors = G.neighbors(user)
            for neighbor in neighbors:
                if neighbor not in active_users:
                    if random.random() < p:
                        new_active_users_temp.append(neighbor)
                        active_users.append(neighbor)
        new_active_users = new_active_users_temp

    return active_users

In [None]:
# Influence Maximization

def influence_maximization_cascade(G, k, p, num_simulations=100):
    seed_users = []

    for _ in range(k):
        max_influence = -1
        best_candidate = None

        for candidate in G.nodes():
            if candidate not in seed_users:
                total_active_users = 0
                for _ in range(num_simulations):
                    active_users = independent_cascade_simulation(G, seed_users + [candidate], p)
                    total_active_users += len(active_users)
                average_active_users = total_active_users / num_simulations

                if average_active_users > max_influence:
                    max_influence = average_active_users
                    best_candidate = candidate

        seed_users.append(best_candidate)

    return seed_users

In [None]:
# Choose the top K seed users based on each centrality measure

K = 10
top_degree_users = sorted(STATS["degree_centrality"], key=STATS["degree_centrality"].get, reverse=True)[:K]
top_closeness_users = sorted(STATS["closeness_centrality"], key=STATS["closeness_centrality"].get, reverse=True)[:K]
top_betweenness_users = sorted(STATS["betweenness_centrality"], key=STATS["betweenness_centrality"].get, reverse=True)[:K]
top_eigenvector_users = sorted(STATS["eigenvector_centrality"], key=STATS["eigenvector_centrality"].get, reverse=True)[:K]
top_pagerank_users = sorted(STATS["pagerank"], key=STATS["pagerank"].get, reverse=True)[:K]


seed_user_sets = {
    'degree': top_degree_users,
    'closeness': top_closeness_users,
    'betweenness': top_betweenness_users,
    'eigenvector': top_eigenvector_users,
    'pagerank': top_pagerank_users
}

# Test different probabilities for the Independent Cascade Model
probabilities = [0.1, 0.2, 0.3, 0.4, 0.5]

results = {}
for centrality, seed_users in seed_user_sets.items():
    results[centrality] = {}
    for p in probabilities:
        # Run multiple simulations and take the average
        num_simulations = 10
        total_active_users = 0
        for _ in range(num_simulations):
            active_users = independent_cascade_simulation(G, seed_users, p)
            total_active_users += len(active_users)
        
        average_active_users = total_active_users / num_simulations
        results[centrality][p] = average_active_users

print(results)