In [None]:
!pip install torch_geometric
import torch
import networkx as nx
import numpy as np
from torch_geometric.datasets import Planetoid,WebKB
import torch_geometric.transforms as T
from tqdm import tqdm

def load_cora_network():
    #Based on your requirement give the path of the datasets
    dataset = WebKB(root='data/WebKB', name='Wisconsin', transform=T.NormalizeFeatures())
    data = dataset[0]

    # Convert to NetworkX graph
    edge_index = data.edge_index.numpy()
    edges = list(zip(edge_index[0], edge_index[1]))
    G = nx.Graph()
    G.add_edges_from(edges)
    return G

def sir_simulation(G, seed_node, beta, gamma=0.2, max_time=100):
    """
    Run a single SIR simulation starting from a seed node.

    Parameters:
    - G: NetworkX graph
    - seed_node: Starting node for infection
    - beta: Infection rate
    - gamma: Recovery rate
    - max_time: Maximum simulation steps

    Returns:
    - final_infected_count: Number of nodes that were infected during simulation
    """
    N = G.number_of_nodes()

    # Initialize states (0: Susceptible, 1: Infected, 2: Recovered)
    states = np.zeros(N)
    states[seed_node] = 1

    infected = {seed_node}
    recovered = set()

    for _ in range(max_time):
        if not infected:
            break

        # Process infections
        new_infected = set()
        for node in infected:
            # Try to infect neighbors
            for neighbor in G.neighbors(node):
                if states[neighbor] == 0 and np.random.random() < beta:
                    new_infected.add(neighbor)
                    states[neighbor] = 1

            # Process recovery
            if np.random.random() < gamma:
                recovered.add(node)
                states[node] = 2

        # Update infected set
        infected = infected.union(new_infected) - recovered

    return len(recovered.union(infected))

def calculate_discrimination(G, beta, num_simulations=100, top_fraction=0.1):
    """
    Calculate discrimination metric D for a given infection rate beta.

    Parameters:
    - G: NetworkX graph
    - beta: Infection rate to test
    - num_simulations: Number of SIR simulations per node
    - top_fraction: Fraction of nodes to consider as high influence group

    Returns:
    - D: Discrimination metric value
    - influence_capacities: List of influence capacities for each node
    """
    N = G.number_of_nodes()
    nodes = list(G.nodes())

    # Calculate influence capacity for each node
    influence_capacities = []
    for node in tqdm(nodes, desc=f"Testing beta={beta:.2f}"):
        node_influence = 0
        for _ in range(num_simulations):
            infected_count = sir_simulation(G, node, beta)
            node_influence += infected_count
        influence_capacities.append(node_influence / num_simulations)

    # Sort nodes by influence capacity
    sorted_capacities = sorted(influence_capacities, reverse=True)

    # Calculate parameters for discrimination metric
    n_top = int(N * top_fraction)
    CH = sum(sorted_capacities[:n_top])
    CL = sum(sorted_capacities[n_top:])
    H = sorted_capacities[0]
    L = sorted_capacities[-1]

    # Calculate discrimination metric D
    D = (CH - CL) / (n_top * (H - L))

    return D, influence_capacities

def find_optimal_beta(G, beta_range=None):
    """
    Find the optimal infection rate beta that maximizes discrimination.

    Parameters:
    - G: NetworkX graph
    - beta_range: Range of beta values to test

    Returns:
    - optimal_beta: Beta value that maximizes discrimination
    - max_D: Maximum discrimination value achieved
    - best_influences: Influence capacities for each node at optimal beta
    """
    if beta_range is None:
        beta_range = np.arange(0.1, 1.0,0.2)

    results = []
    best_influences = None

    for beta in beta_range:
        D, influences = calculate_discrimination(G, beta)
        results.append((beta, D, influences))
        print(f"Beta: {beta:.1f}, Discrimination: {D:.4f}")

    optimal_beta, max_D, best_influences = max(results, key=lambda x: x[1])
    return optimal_beta, max_D, best_influences

def main():
    # Load Cora network
    print("Loading Cora dataset...")
    G = load_cora_network()
    print(f"Loaded Cora network with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

    # Find optimal infection rate
    print("\nFinding optimal infection rate...")
    optimal_beta, max_D, best_influences = find_optimal_beta(G)

    print(f"\nResults:")
    print(f"Optimal infection rate (beta): {optimal_beta:.2f}")
    print(f"Maximum discrimination value: {max_D:.4f}")

    # Get top influential nodes
    node_influences = list(enumerate(best_influences))
    top_nodes = sorted(node_influences, key=lambda x: x[1], reverse=True)[:10]

    print("\nTop 10 most influential nodes:")
    for node_id, influence in top_nodes:
        print(f"Node {node_id}: Influence capacity = {influence:.2f}")

if __name__ == "__main__":
    main()