In [2]:
import networkx as nx
from concurrent.futures import ThreadPoolExecutor
from collections import deque
import time

def load_directed_graph(filename):
    """
    Loads a directed graph from a file. Assumes tab-separated edges with optional comments.
    """
    G = nx.DiGraph()
    with open(filename, 'r') as file:
        for line in file:
            if line.startswith("#") or line.strip() == "":
                continue
            from_node, to_node = map(int, line.strip().split())
            G.add_edge(from_node, to_node)
    return G

def forward_reach(G, start_node):
    visited = set()
    queue = deque([start_node])
    while queue:
        node = queue.popleft()
        if node not in visited:
            visited.add(node)
            queue.extend(G.successors(node))
    return visited

def backward_reach(G, start_node):
    visited = set()
    queue = deque([start_node])
    while queue:
        node = queue.popleft()
        if node not in visited:
            visited.add(node)
            queue.extend(G.predecessors(node))
    return visited

def find_scc_parallel(G):
    """
    Identifies SCCs using a simulated parallel forward-backward reach algorithm.
    """
    remaining_nodes = set(G.nodes())
    sccs = []

    while remaining_nodes:
        pivot = next(iter(remaining_nodes))

        with ThreadPoolExecutor(max_workers=2) as executor:
            future_fwd = executor.submit(forward_reach, G, pivot)
            future_bwd = executor.submit(backward_reach, G, pivot)
            forward_set = future_fwd.result()
            backward_set = future_bwd.result()

        scc = forward_set & backward_set
        sccs.append(scc)

        remaining_nodes -= scc
        G.remove_nodes_from(scc)

    return sccs

# ---------------------- Main Execution ----------------------

if __name__ == "__main__":
    filename = "snap.txt"

    start_load = time.time()
    graph = load_directed_graph(filename)
    end_load = time.time()
    print(f"Graph loaded in {end_load - start_load:.4f} seconds.")
    print("Number of nodes:", graph.number_of_nodes())
    print("Number of edges:", graph.number_of_edges())

    # Time the SCC detection
    print("\nFinding SCCs using simulated parallel algorithm...")
    start_scc = time.time()
    sccs = find_scc_parallel(graph.copy())  # Use a copy to preserve original
    end_scc = time.time()

    print(f"\nTotal SCCs found: {len(sccs)}")
    largest_scc = max(sccs, key=len)
    print("Size of largest SCC:", len(largest_scc))
    print(f"Time taken for SCC detection: {end_scc - start_scc:.4f} seconds.")


Graph loaded in 2.0072 seconds.
Number of nodes: 77360
Number of edges: 905468

Finding SCCs using simulated parallel algorithm...

Total SCCs found: 6724
Size of largest SCC: 70355
Time taken for SCC detection: 2.4270 seconds.


In [3]:
import networkx as nx
from concurrent.futures import ThreadPoolExecutor
from collections import deque
import time

def load_directed_graph(filename):
    G = nx.DiGraph()
    with open(filename, 'r') as file:
        for line in file:
            if line.startswith("#") or line.strip() == "":
                continue
            from_node, to_node = map(int, line.strip().split())
            G.add_edge(from_node, to_node)
    return G

def forward_reach(G, start_node):
    visited = set()
    queue = deque([start_node])
    while queue:
        node = queue.popleft()
        if node not in visited:
            visited.add(node)
            queue.extend(G.successors(node))
    return visited

def backward_reach(G, start_node):
    visited = set()
    queue = deque([start_node])
    while queue:
        node = queue.popleft()
        if node not in visited:
            visited.add(node)
            queue.extend(G.predecessors(node))
    return visited

def find_scc_parallel(G):
    remaining_nodes = set(G.nodes())
    sccs = []

    while remaining_nodes:
        pivot = next(iter(remaining_nodes))

        with ThreadPoolExecutor(max_workers=2) as executor:
            future_fwd = executor.submit(forward_reach, G, pivot)
            future_bwd = executor.submit(backward_reach, G, pivot)
            forward_set = future_fwd.result()
            backward_set = future_bwd.result()

        scc = forward_set & backward_set
        sccs.append(scc)

        remaining_nodes -= scc
        G.remove_nodes_from(scc)

    return sccs

# ---------------------- Main Execution ----------------------

if __name__ == "__main__":
    filename = "snap.txt"

    # Load the graph once
    start_load = time.time()
    original_graph = load_directed_graph(filename)
    end_load = time.time()
    print(f"Graph loaded in {end_load - start_load:.4f} seconds.")
    print("Number of nodes:", original_graph.number_of_nodes())
    print("Number of edges:", original_graph.number_of_edges())

    total_time = 0
    final_sccs = None

    print("\nRunning SCC detection 10 times...")
    for i in range(10):
        graph_copy = original_graph.copy()
        start = time.time()
        sccs = find_scc_parallel(graph_copy)
        end = time.time()
        run_time = end - start
        total_time += run_time
        print(f"Run {i + 1}: {run_time:.4f} seconds")
        if i == 0:
            final_sccs = sccs  # Save for reporting

    avg_time = total_time / 10
    print(f"\nAverage time over 10 runs: {avg_time:.4f} seconds")
    print(f"Total SCCs found in first run: {len(final_sccs)}")
    print(f"Size of largest SCC in first run: {len(max(final_sccs, key=len))}")


Graph loaded in 1.5218 seconds.
Number of nodes: 77360
Number of edges: 905468

Running SCC detection 10 times...
Run 1: 0.9425 seconds
Run 2: 0.9388 seconds
Run 3: 0.9421 seconds
Run 4: 0.9464 seconds
Run 5: 0.9451 seconds
Run 6: 0.9557 seconds
Run 7: 0.9485 seconds
Run 8: 0.9392 seconds
Run 9: 0.9457 seconds
Run 10: 0.9474 seconds

Average time over 10 runs: 0.9452 seconds
Total SCCs found in first run: 6724
Size of largest SCC in first run: 70355
