# Displaying the number of nodes and edges of real world graph

In [2]:
import networkx as nx

def load_directed_graph(filename):
    G = nx.DiGraph()
    
    with open(filename, 'r') as file:
        for line in file:
            if line.startswith("#") or line.strip() == "":
                continue  # skip comments and empty lines
            from_node, to_node = map(int, line.strip().split())
            G.add_edge(from_node, to_node)
    
    return G

# Usage
graph = load_directed_graph('snap.txt')

# Optional: Print basic info
print("Number of nodes:", graph.number_of_nodes())
print("Number of edges:", graph.number_of_edges())

Number of nodes: 77360
Number of edges: 905468


# Iterative approach - Real world

In [7]:
import networkx as nx
import time

# -----------------------
# Load directed graph
# -----------------------
def load_directed_graph(filename):
    G = nx.DiGraph()
    with open(filename, 'r') as file:
        for line in file:
            if line.startswith("#") or line.strip() == "":
                continue
            from_node, to_node = map(int, line.strip().split())
            G.add_edge(from_node, to_node)
    return G

# -----------------------
# Iterative Kosaraju's Algorithm
# -----------------------
def kosaraju_scc_iterative(graph):
    def dfs_order_iterative(graph):
        visited = set()
        finish_stack = []

        for node in graph.nodes():
            if node in visited:
                continue
            stack = [(node, False)]
            while stack:
                current, expanded = stack.pop()
                if expanded:
                    finish_stack.append(current)
                elif current not in visited:
                    visited.add(current)
                    stack.append((current, True))
                    for neighbor in graph.neighbors(current):
                        if neighbor not in visited:
                            stack.append((neighbor, False))
        return finish_stack

    def dfs_collect_scc(graph, nodes_in_order):
        visited = set()
        sccs = []

        for node in reversed(nodes_in_order):
            if node in visited:
                continue
            stack = [node]
            scc = []
            while stack:
                current = stack.pop()
                if current in visited:
                    continue
                visited.add(current)
                scc.append(current)
                for neighbor in graph.neighbors(current):
                    if neighbor not in visited:
                        stack.append(neighbor)
            sccs.append(scc)
        return sccs

    finish_order = dfs_order_iterative(graph)
    transposed = graph.reverse(copy=True)
    return dfs_collect_scc(transposed, finish_order)

# -----------------------
# Iterative Tarjan’s Algorithm
# -----------------------
def tarjans_scc_iterative(graph):
    index_counter = [0]
    index = {}
    lowlink = {}
    on_stack = set()
    visited = set()
    sccs = []
    stack = []
    to_visit = []

    for v in graph.nodes():
        if v in visited:
            continue

        to_visit.append((v, 'visit'))

        while to_visit:
            node, action = to_visit.pop()

            if action == 'visit':
                if node in visited:
                    continue

                visited.add(node)
                index[node] = index_counter[0]
                lowlink[node] = index_counter[0]
                index_counter[0] += 1
                stack.append(node)
                on_stack.add(node)

                to_visit.append((node, 'postvisit'))
                for neighbor in reversed(list(graph.successors(node))):
                    if neighbor not in visited:
                        to_visit.append((neighbor, 'visit'))
                    elif neighbor in on_stack:
                        lowlink[node] = min(lowlink[node], index[neighbor])

            elif action == 'postvisit':
                for neighbor in graph.successors(node):
                    if neighbor in on_stack:
                        lowlink[node] = min(lowlink[node], lowlink[neighbor])

                if lowlink[node] == index[node]:
                    scc = []
                    while True:
                        w = stack.pop()
                        if w in on_stack:
                            on_stack.remove(w)
                        scc.append(w)
                        if w == node:
                            break
                    sccs.append(scc)

    return sccs

# -----------------------
# Benchmark Helper
# -----------------------
def benchmark_algorithm(name, func, graph, runs=10):
    total_time = 0
    result_sccs = None
    for i in range(runs):
        start = time.time()
        sccs = func(graph)
        end = time.time()
        total_time += (end - start)
        if i == 0:
            result_sccs = sccs
    avg_time = total_time / runs
    return avg_time, result_sccs

# -----------------------
# Benchmark Runner
# -----------------------
def run_benchmark():
    graph = load_directed_graph("snap.txt")
    print("Graph loaded.")
    print("Nodes:", graph.number_of_nodes())
    print("Edges:", graph.number_of_edges())

    print("\nRunning Kosaraju's algorithm (iterative) 10 times...")
    kosaraju_time, kosaraju_sccs = benchmark_algorithm("Kosaraju", kosaraju_scc_iterative, graph)

    print("Running Tarjan's algorithm (iterative) 10 times...")
    tarjan_time, tarjan_sccs = benchmark_algorithm("Tarjan", tarjans_scc_iterative, graph)

    print("\n--- SCC Benchmark Results (Average of 10 runs) ---")
    print(f"Kosaraju (Iterative): {kosaraju_time:.4f} sec — SCCs: {len(kosaraju_sccs)}")
    print(f"Tarjan   (Iterative): {tarjan_time:.4f} sec — SCCs: {len(tarjan_sccs)}")

if __name__ == "__main__":
    run_benchmark()


Graph loaded.
Nodes: 77360
Edges: 905468

Running Kosaraju's algorithm (iterative) 10 times...
Running Tarjan's algorithm (iterative) 10 times...

--- SCC Benchmark Results (Average of 10 runs) ---
Kosaraju (Iterative): 2.0924 sec — SCCs: 6724
Tarjan   (Iterative): 0.4403 sec — SCCs: 6724


# Recursive Approach - Real world

In [8]:
import networkx as nx
import time
import sys

# Increase recursion limit for large graphs
sys.setrecursionlimit(10**6)

# ----------------------------------
# Load the directed graph
# ----------------------------------
def load_directed_graph(filename):
    G = nx.DiGraph()
    with open(filename, 'r') as file:
        for line in file:
            if line.startswith("#") or line.strip() == "":
                continue
            from_node, to_node = map(int, line.strip().split())
            G.add_edge(from_node, to_node)
    return G

# ----------------------------------
# Recursive Kosaraju's Algorithm
# ----------------------------------
def kosaraju_scc_recursive(graph):
    def dfs_fill_order(v, visited, stack):
        visited.add(v)
        for neighbor in graph.successors(v):
            if neighbor not in visited:
                dfs_fill_order(neighbor, visited, stack)
        stack.append(v)

    def dfs_collect_scc(v, visited, transpose_graph, current_scc):
        visited.add(v)
        current_scc.append(v)
        for neighbor in transpose_graph.successors(v):
            if neighbor not in visited:
                dfs_collect_scc(neighbor, visited, transpose_graph, current_scc)

    visited = set()
    stack = []

    # Step 1: Fill order by finish time
    for node in graph.nodes():
        if node not in visited:
            dfs_fill_order(node, visited, stack)

    # Step 2: Transpose the graph
    transpose_graph = graph.reverse(copy=True)

    # Step 3: DFS on transposed graph in reverse finishing order
    visited.clear()
    sccs = []
    while stack:
        node = stack.pop()
        if node not in visited:
            current_scc = []
            dfs_collect_scc(node, visited, transpose_graph, current_scc)
            sccs.append(current_scc)

    return sccs

# ----------------------------------
# Benchmarking function
# ----------------------------------
def benchmark_algorithm(name, func, graph, runs=10):
    total_time = 0
    result_sccs = None
    for i in range(runs):
        start = time.time()
        sccs = func(graph)
        end = time.time()
        total_time += (end - start)
        if i == 0:
            result_sccs = sccs
    avg_time = total_time / runs
    return avg_time, result_sccs

# ----------------------------------
# Run benchmark for both algorithms
# ----------------------------------
def run_benchmark():
    graph = load_directed_graph("snap.txt")
    print("Graph loaded.")
    print("Nodes:", graph.number_of_nodes())
    print("Edges:", graph.number_of_edges())

    # Kosaraju (Recursive)
    print("\nRunning Kosaraju's algorithm (recursive) 10 times...")
    kosaraju_time, kosaraju_sccs = benchmark_algorithm("Kosaraju Recursive", kosaraju_scc_recursive, graph)

    # Tarjan (Recursive via NetworkX)
    print("Running Tarjan's algorithm (recursive via NetworkX) 10 times...")
    tarjan_time, tarjan_sccs = benchmark_algorithm("Tarjan Recursive", lambda g: list(nx.strongly_connected_components(g)), graph)

    # Results
    print("\n--- SCC Benchmark Results (Average of 10 runs) ---")
    print(f"Kosaraju (Recursive): {kosaraju_time:.4f} sec — SCCs: {len(kosaraju_sccs)}")
    print(f"Tarjan   (Recursive): {tarjan_time:.4f} sec — SCCs: {len(tarjan_sccs)}")

# ----------------------------------
# Main entry point
# ----------------------------------
if __name__ == "__main__":
    run_benchmark()


Graph loaded.
Nodes: 77360
Edges: 905468

Running Kosaraju's algorithm (recursive) 10 times...
Running Tarjan's algorithm (recursive via NetworkX) 10 times...

--- SCC Benchmark Results (Average of 10 runs) ---
Kosaraju (Recursive): 2.0831 sec — SCCs: 6724
Tarjan   (Recursive): 0.5059 sec — SCCs: 6724
