In [1]:
class DirectedGraph:

    class Vertex:

        def __init__(self, G):
            self.heads = set()
            self.tails = set()
            self.explored = False
            self.G = G

        def connect_to(self, v_name):
            v = self.G.vertex(v_name)
            if v not in self.heads and self not in v.tails:
                self.heads.add(v)
                v.tails.add(self)
            return self

    def __init__(self):
        self.V = dict()

    def reset(self):
        for v in self.V:
            self.V[v].explored = False

    def vertex(self, v_name):
        if v_name not in self.V:
            self.V[v_name] = DirectedGraph.Vertex(self)
        return self.V[v_name]

    def dfs_scc(self):
        # 1. DFS on G^r to number nodes
        # 2. DFS on G in order of new numbering to find SCC's
        t = 0
        max_label, min_label = max(self.V.keys()), min(self.V.keys())
        rev = [None,]*max_label
        def recurse1(v):
            nonlocal t
            v.explored = True
            for w in v.tails:
                if not w.explored:
                    recurse1(w)
            rev[t] = v
            t += 1
        for i in range(max_label, min_label, -1):
            v = self.V[i]
            if not v.explored:
                recurse1(v)

    def dfs_scc_it(self):
        # 1. DFS on G^r to number nodes
        # 2. DFS on G in order of new numbering to find SCC's
        t = 0
        max_label, min_label = max(self.V.keys()), min(self.V.keys())
        rev = [None,]*max_label
        stack = list()
        for i in range(max_label, min_label, -1):
            v = self.V[i]
            if not v.explored:
                stack.append((v, set(v.tails)))
                v.explored = True
                while len(stack) > 0:
                    if len(stack[-1][1]) > 0:
                        w = stack[-1][1].pop()
                        if not w.explored:
                            w.explored = True
                            stack.append((w, w.tails))
                    else:
                        rev[t] = stack[-1][0]
                        stack.pop()
                        t += 1
        self.reset()
        sccs = list()
        for i in range(t - 1, 0, -1):
            v = rev[i]
            if not v.explored:
                size = 0
                stack.append((v, set(v.heads)))
                v.explored = True
                while len(stack) > 0:
                    if len(stack[-1][1]) > 0:
                        w = stack[-1][1].pop()
                        if not w.explored:
                            w.explored = True
                            stack.append((w, w.heads))
                    else:
                        stack.pop()
                        size += 1
                sccs.append(size)
        sccs.sort()
        return sccs

In [2]:
import zipfile

g = DirectedGraph()
with zipfile.ZipFile('SCC.zip') as zf:
    with zf.open('SCC.txt') as edgelist:
        for line in edgelist:
            l = tuple(int(vi) for vi in line.split())
            if l[0] == l[1]:
                continue
            t = g.vertex(l[0])
            t.connect_to(l[1])

In [3]:
g.dfs_scc_it()

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [4]:
x = _3

In [9]:
sorted(x[-5:], reverse=True)

[434821, 968, 459, 313, 211]

In [2]:
import zipfile

g = DirectedGraph()
with zipfile.ZipFile('SCC.zip') as zf:
    with zf.open('SCC.txt') as edgelist:
        for line in edgelist:
            l = tuple(int(vi) for vi in line.split())
            if l[0] == l[1]:
                continue
            t = g.vertex(l[1])
            t.connect_to(l[0])

In [3]:
sorted(g.dfs_scc_it()[-5:], reverse=True)

[434821, 968, 459, 313, 211]