In [14]:
# setup
from IPython.core.display import display,HTML
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))
display(HTML(open('../rise.css').read()))

# imports
import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# %matplotlib inline
# sns.set(style="whitegrid", font_scale=1.5, rc={'figure.figsize':(12, 6)})


  from IPython.core.display import display,HTML


# CMPS 2200
# Introduction to Algorithms

## Depth First Search


### About BFS

#### Q1. Shortest Path in Unweighted Networks
Example: Google Maps finding shortest path when all roads are equal in cost.

<img src= 'figures/unweighted_graph.png' width=50%/>




#### Q2. You’re given a 2D grid with walls and open paths. What's the best approach to find the shortest distance from top-left to bottom-right?

The following is a simple 5x5 grid with open paths (0) and walls (1):
```

0  0  0  0  0
0  1  1  1  0
0  1  0  0  0
0  1  1  1  0
0  0  0  0  0
```



#### Q3. Detecting Friend Groups on a Social Network


You are working as a data analyst for a social media platform like Facebook. The platform has millions of users, and you are tasked with identifying friend groups — groups of people who are all connected, directly or indirectly.

A friendship can be:

 - Undirected (e.g., mutual friends: you follow me and I follow you).

 - Or directed (e.g., one-way follow: I follow you but you don’t follow me back — like Twitter or Instagram).

#### Your goal is to detect clusters of users such that:

In undirected networks, each group is a connected component.

In directed networks, each group is a strongly connected component (SCC) — every user can reach every other user in that group via directed connections.

### Depth First Search
> Visit one node's children instead of its siblings

<img src= 'figures/tree-dfs-vs-bfs.gif' width=70%/>


<br>
<br>
We can implement DFS with a stack

> **last in first out**


<center>
<img src="figures/graph_search_ex2.png" width=70%/>
</center>


In [1]:
from collections import deque

def dfs_stack(graph, source):
    def dfs_stack_helper(visited, frontier):
        if len(frontier) == 0:
            return visited
        else:
            node = frontier.pop() ## only difference
            print('visiting', node)
            visited.add(node)
            frontier.extend(filter(lambda n: n not in visited, graph[node]))
            return dfs_stack_helper(visited, frontier)
        
    frontier = deque()
    frontier.append(source)
    visited = set()
    return dfs_stack_helper(visited, frontier)
    
graph = {
            'A': {'B', 'C'},
            'B': {'A', 'D', 'E'},
            'C': {'A', 'F', 'G'},
            'D': {'B'},
            'E': {'B', 'H'},
            'F': {'C'},
            'G': {'C'},
            'H': {'E'}
        }

dfs_stack(graph, 'A')

visiting A
visiting C
visiting F
visiting G
visiting B
visiting D
visiting E
visiting H


{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'}

### Compare with `bfs_serial`!

`dfs_stack`:

- `node = frontier.pop()`


`bfs_serial`:

- `node = frontier.popleft()`


### DFS with recursion


but wait, can't we just use recursion?

recursion maintains a stack of calls automatically.

<center>
<img src="https://upload.wikimedia.org/wikipedia/commons/7/7f/Depth-First-Search.gif" width=25%/>
</center>



In [2]:
def dfs_recursive(graph, source):
    def dfs_recursive_helper(visited, node):  
        if node in visited:
            return visited
        else:
            print('visiting', node)
            visited.add(node)
            for neighbour in graph[node]:
                dfs_recursive_helper(visited, neighbour)
            return visited
        
    visited = set()
    return dfs_recursive_helper(visited, source)

dfs_recursive(graph, 'A')

visiting A
visiting B
visiting E
visiting H
visiting D
visiting C
visiting G
visiting F


{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'}

In [3]:
def dfs_recursive_iter(graph, source):  
    def dfs_recursive_helper_iter(visited, node):
        if node in visited:
            return visited
        else:
            print('visiting', node)
            visited.add(node)
            iterate(dfs_recursive_helper_iter, visited, list(graph[node]))
            return visited

    visited = set()
    return dfs_recursive_helper_iter(visited, source)

def iterate(f, x, a):
    if len(a) == 0:
        return x
    else:
        return iterate(f, f(x, a[0]), a[1:])

dfs_recursive_iter(graph, 'A')

visiting A
visiting B
visiting E
visiting H
visiting D
visiting C
visiting G
visiting F


{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'}

## Cost of DFS

As in BFS, we add a node to the visited set exactly once ($|V|$).

For each edge, we do one lookup to see if it exists in the visited set ($|E|$).

Thus, the total work is equivalent to BFS: $O(|V| + |E|)$.



## Parallelism in DFS?
<img src="figures/dfs_nop.jpg" width="30%"/>

Is there any opportunity for parallelism?

One idea is to just run the search for each child in parallel. 
- E.g., in this example, search the subtree starting at $a$ in parallel with the subtree starting at $b$

<span style="color:red">**Question**:</span> What potential problems arise?

- We may end up visiting $b$ twice (or $c$, or $f$)
- This isn't in DFS order! We shouldn't be visiting $b$ before $e$.


## Cycle detection

How can we modify DFS to determine if the graph has a cycle?

**cycle**: a path in which all nodes are distinct except the first and last
- in an undirected graph, a cycle must contain at least three nodes

<span style="color:red">**Question**:</span> How??

**idea**: determine whether a vertex is visited more than once.

but...the second visit must be from a different source

<img src="figures/triangle.png"/>

e.g., if $a$ is the source, we will vist $b$ twice
- once when it is added to `visited`
- once in the base case of the recursive call (`if node in visited`), with `c` as the parent

but we will visit $a$ three times:
- once when it is added to `visited`
- twice in the base case of the recursive call (`if node in visited`)
  - with `b` as the parent
  - with `c` as the parent
  
So, we need to keep track of the parent of each recursive call, and make sure not to make a recursive call back to the parent.

In [4]:
def dfs_cycle(graph, source):

    def dfs_cycle_helper(result, node, parent):
        """
        We pack (visited, has_cycle) variables into a single result variable,
        so we can use iterate.
        """
        visited, has_cycle = result

        if node in visited:
            print('found cycle from %s to %s' % (parent, node))
            return (visited, True)

        else:
            print('visiting', node)
            visited.add(node)
            # ignore the parent!
            neighbors = list(filter(lambda n: n != parent, graph[node]))
            # curry the dfs_cycle_helper function to set the parent variable 
            # to be the node we are visiting now.                         
            fn = lambda r, n: dfs_cycle_helper(r, n, node)
            res = iterate(fn, (visited, has_cycle), neighbors)
            return res
    
    visited = set()
    return dfs_cycle_helper((visited, False), source, source)
    
dfs_cycle(graph, 'A')

visiting A
visiting B
visiting E
visiting H
visiting D
visiting C
visiting G
visiting F


({'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'}, False)

In [5]:
graph2 = {
            'A': {'B', 'C'},
            'B': {'A', 'D', 'E'},
            'C': {'A', 'F', 'G'},
            'D': {'B'},
            'E': {'B', 'H'},
            'F': {'C'},
            'G': {'C', 'A'},  # add cycle back to A from G
            'H': {'E'}
        }
dfs_cycle(graph2, 'A')

visiting A
visiting B
visiting D
visiting E
visiting H
visiting C
visiting G
found cycle from G to A
visiting F


({'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'}, True)

## Strongly Connected Component (SCC) 


Let $G= (V;E)$ be a directed graph. A strongly connected comp onent (SCC) of G is a subset S of V s.t. 
> for any two vertices $u, v \in S$, G has a path from u to v and a path from v to u;

> S is maximal in the sense that we cannot put any more vertex into S without breaking the above property.

<img src="figures/scc_ex1.png"/>



> $\{a, b, c \}$ is an SCC.

> $\{a, b, c, d \}$ is not an SCC.

> $\{d, e, f , k, l \}$ is not an SCC (because we can still add vertex g).

> $\{e, d, f , k, l, g \}$ is an SCC.

## 🧠 High-Level Intuition
The key insight is:

In the original graph, nodes within an SCC can reach each other, but nodes in different SCCs may not be mutually reachable. If we reverse the graph (transpose), the direction of reachability between SCCs also reverses.

Kosaraju’s algorithm cleverly uses two passes of DFS to isolate SCCs.

### 🧠 Key Idea of Kosaraju's Algorithm
Kosaraju’s algorithm finds SCCs in 3 main steps:

1. **First DFS** (on the original graph):
    - Do a standard DFS on the original graph.
    - Push each node to a stack when its DFS finishes (i.e., all its descendants are visited).
    - This gives you the nodes in order of decreasing finish time.
    
2. **Graph Transposition**:
    - Reverse the direction of every edge in the graph.
    - If there was an edge u → v, now it becomes v → u.
    
    <img src="figures/scc_ex2.png"/>


3. **Second DFS** (on the transposed graph):
    - Pop nodes one by one from the stack (from Step 1).
    - For each unvisited node, do DFS on the transposed graph.
    - All nodes reached during this DFS form one SCC.

In [None]:
from collections import defaultdict

def dfs_forward(graph, node, visited, stack):
    visited.add(node)
    for neighbor in graph[node]:
        if neighbor not in visited:
            dfs_forward(graph, neighbor, visited, stack)
    stack.append(node)  # append after visiting all descendants

def dfs_inverse(graph, node, visited, component):
    visited.add(node)
    component.add(node)
    for neighbor in graph[node]:
        if neighbor not in visited:
            dfs_inverse(graph, neighbor, visited, component)

def transpose_graph(graph):
    transposed = defaultdict(list)
    for u in graph:
        for v in graph[u]:
            transposed[v].append(u)
    return transposed

def kosaraju_scc(graph):
    visited = set()
    stack = []

    # Step 1: Fill stack with finishing times
    for node in graph:
        if node not in visited:
            dfs_forward(graph, node, visited, stack)

    # Step 2: Transpose the graph
    trans_graph = transpose_graph(graph)


    # Step 3: Process nodes in reverse finishing time to find SCCs
    visited.clear()
    sccs = []
    print(stack)
    while stack:
        node = stack.pop()
        if node not in visited:
            component = set()
            dfs_inverse(trans_graph, node, visited, component)
            sccs.append(component)

    return sccs


In [None]:
graph = {
    'a': {'b', 'e'},
    'b': {'a'},
    'c': {'a', 'b'},
    'd': {'b', 'e'},
    'e': {'d', 'f'},
    'f': {'g', 'k'},
    'g': {'l'},
    'h': {'i'},
    'i': {'h'},
    'j': {'e', 'g', 'h'},
    'k': {'f'},
    'l': {'k'}
}

sccs = kosaraju_scc(graph)
print("Strongly Connected Components:")
for scc in sccs:
    print(scc)
