# Graph Search and Topological Sort

In [1]:
class Vertex:    
    def __init__(self, value, children=[]):
        self.value = value
        self.children = set(children)
        
t = Vertex('t', [])
v = Vertex('v', [t])
w = Vertex('w', [t])
s = Vertex('s', [v, w])
graph = [s, w, v, t]

## BFS and DFS

In [2]:
def bfs(start):
    visited = set()
    queue = [start]
    while queue:
        vertex = queue.pop(0)
        if vertex.value not in visited:
            print('Visit: {}'.format(vertex.value))
            visited.add(vertex.value)
            queue.extend(vertex.children - visited)

bfs(graph[0])

Visit: s
Visit: v
Visit: w
Visit: t


In [3]:
def dfs(start):
    visited = set()
    stack = [start]
    while stack:
        vertex = stack.pop()
        if vertex.value not in visited:
            print('Visit: {}'.format(vertex.value))
            visited.add(vertex.value)
            stack.extend(vertex.children - visited)

dfs(graph[0])

Visit: s
Visit: w
Visit: t
Visit: v


## Topological Sort

**Goal:** Labeling $f$ of $G$'s nodes such that:
1. The $f(v)$'s are the set ${1, 2, ..., n}$
2. $(u, v) \in G \Rightarrow f(u) < f(v)$

**Motivation:** Create sequence of tasks while respecting dependencies.

**Note:** No directed cycle => can compute topological ordering in $O(m + n)$

### 1) Straighforward Algorithm

**Note:** Every directed acyclic graph has a sink vertex

```
1) Let v be a sink vertex of G
2) Set f(v) = n
3) Recurse on G - {v}
```

### 2) TopoSort using DFS

In [4]:
def dfsTopo(vertex, visited, stack):
    visited.add(vertex.value)
    for c in vertex.children:
        if c.value not in visited:
            dfsTopo(c, visited, stack)
    stack.insert(0, vertex.value)

def toposort(graph):
    visited = set()
    stack = []
    for v in graph:
        if v.value not in visited:
            dfsTopo(v, visited, stack)
    return stack

toposort(graph)

['s', 'w', 'v', 't']

## Strongly Connected Components

**Kosaraju's Two Pass Alrotithm:**
```
1. Let G^rev = G with reverted edges
2. Run dfs_loop on G^rev
    * Let f(v) = "finish time"
3. Run dfs_loop on G
    * Processing nodes in order of decr finish time
4. SCCs = nodes with same "leader" node
```

In [6]:
def scc(graph, graph_rev):
    global leader, finish
    leader = {} # Map node to leader
    finish = {} # Map node to finish order
    # Pass 1 - Compute finish order
    scc_dfs_loop(graph_rev.values())
    # Order graph by finish time
    graph_order = sorted(finish, key=finish.get, reverse=True)
    graph_ordered = [graph[x] for x in graph_order]
    # Pass 2 - Compute leader
    scc_dfs_loop(graph_ordered)
    # All nodes with same leader are a SCC
    return leader
    
def scc_dfs_loop(graph):
    global num_processed, source_vertex, visited
    num_processed = 0
    source_vertex = None
    visited = set()
    for node in graph:
        if node.value not in visited:
            source_vertex = node.value
            scc_dfs(node)
            
def scc_dfs(node):
    global num_processed
    visited.add(node.value)
    leader[node.value] = source_vertex
    for c in node.children:
        if c.value not in visited:
            scc_dfs(c)
    num_processed += 1
    finish[node.value] = num_processed

def _get_graph():
    d = Vertex('d', []) # [b]
    c = Vertex('c', [d])
    b = Vertex('b', [c])
    a = Vertex('a', [b, c])
    d.children = [b]
    graph = {'a': a, 'b': b, 'c': c, 'd': d}
    a_rev = Vertex('a', [])
    d_rev = Vertex('d', []) # [c_rev]
    b_rev = Vertex('b', [a_rev, d_rev])
    c_rev = Vertex('c', [a_rev, b_rev])
    d_children = [c_rev]
    graph_rev = {'a': a_rev, 'b': b_rev, 'c': c_rev, 'd': d_rev}
    return graph, graph_rev

graph, graph_rev = _get_graph()
scc(graph, graph_rev)

{'a': 'a', 'b': 'c', 'c': 'c', 'd': 'c'}

### Lemma

* Consider two "adjacent" SCCsin G (C_1 -> C_2).
* Let f(v) = finishing times of DFS_Loop

**Then:** $max(f(v \in C_1)) < max(f(v \in C_2))$

**Corollary:** Max f-value of G must be in "sink SCC"!