In [1]:
# setup
from IPython.core.display import display,HTML
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))
display(HTML(open('../rise.css').read()))

# imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style="whitegrid", font_scale=1.5, rc={'figure.figsize':(12, 6)})


# CMPS 2200
# Introduction to Algorithms

## Spanning Trees


Agenda

- Introduce Spanning Trees 
- Minimal Spanning Trees

Recall breadth-first search. What order will it visit nodes in this graph?

<center>
<img src="figures/st0.png"/>
</center>

<center>
<img src="figures/st1.png"/>
</center>

<br><br>

Because we avoid revisiting nodes, we can view the edges we visit as a tree.

```python
for n in graph[node]:
    if n not in visited:
        frontier.append(n)
```

<center>
<img src="figures/st2.png"/>
</center>

<br>

Because this tree includes all vertices, we call this a **spanning tree**.

> For a connected undirected graph $G = (V,E)$, a **spanning tree** is a tree $T = (V,E')$ with $E' \subseteq E$
 


 
Now, suppose we have a weighted graph:

<center>
<img src="figures/st3.png"/>
</center>

We refer to the **weight** of a tree $T$ with edges $E(T)$ as:

$$w(T) = \sum_{e \in E(T)} w(e)$$


`Is there any small spanning tree?`

<center>
<img src="figures/st4.png"/>
</center>

This is called the **minimum spanning tree (MST)**  of the graph.

<br><br>

What are some applications where we might want to find the MST?

### Applications of MST

- Power grid
  - minimize cost
  
- Transportation networks
  - build bridges between towns
  - minimize building cost
  
- Computer networks
  - minimize throughput
  
  


What is the brute-force approach to find the MST?

As usual, we'll have an exponential number of possible spanning trees to consider.

<br><br>

What about a greedy approach?

Can we just select edges in increasing order of weight?

### Light-Edge Property

<br><br>

<center>
    <img src="figures/mst_example.jpeg"/>
</center>

<br>

- Node Perspective [Prim's Algorithm]
- Edge Perspective [Kruskal Algorithm]

## Prim's Algorithm

Perform **priority-first search** on $G$ starting from an arbitrary vertex $s$.

To select the next edge to expand the frontier $X$, use priority:
- $p(v) = \min_{x \in X} w(x,v)$
- Add the chosen edge $(u,v)$ to the tree.



<center>
    <img src="figures/prim.jpg" width=40%/>
</center>

- Edge $(c, f)$ has minimum weight across the cut $(X,Y)$.
- So, we visit $f$ by adding it to the frontier


This sounds very similar to Dijkstra's algorithm. What's the difference?

<center>
<img src="figures/st4.png"/>
</center>


In [None]:
from heapq import heappush, heappop 

def dijkstra(graph, source):
    def dijkstra_helper(visited, frontier):
        if len(frontier) == 0:
            return visited
        else:
            distance, node = heappop(frontier)
            if node in visited:
                return dijkstra_helper(visited, frontier)
            else:
                print('visiting', node)
                visited[node] = distance
                for neighbor, weight in graph[node]:
                    heappush(frontier, (distance + weight, neighbor))                
                return dijkstra_helper(visited, frontier)
        
    frontier = []
    heappush(frontier, (0, source))
    visited = dict()  # store the final shortest paths for each node.
    return dijkstra_helper(visited, frontier)

graph = {
            's': {('a', 4), ('b', 8)},
            'a': {('s', 4), ('b', 2), ('c', 5)},
            'b': {('s', 8), ('a', 2), ('c', 3)}, 
            'c': {('a', 5), ('b', 3), ('d', 3)},
            'd': {('c', 3)},
        }
dijkstra(graph, 's')

In [None]:
def prim(graph):
    def prim_helper(visited, frontier, tree):
        if len(frontier) == 0:
            return tree
        else:
            weight, node, parent = heappop(frontier)
            if node in visited:
                return prim_helper(visited, frontier, tree)
            else:
                print('visiting', node)
                # record this edge in the tree
                tree.add((weight, node, parent))
                visited.add(node)
                for neighbor, w in graph[node]:
                    heappush(frontier, (w, neighbor, node)) ## different point ## 
                    # compare with dijkstra:
                    # heappush(frontier, (distance + weight, neighbor))                

                return prim_helper(visited, frontier, tree)
        
    # pick first node as source arbitrarily
    source = list(graph.keys())[0]
    frontier = []
    heappush(frontier, (0, source, source))
    visited = set()  # store the visited nodes (don't need distance anymore)
    tree = set()
    prim_helper(visited, frontier, tree)
    return tree

prim(graph)

## Work of Prim's Algorithm

This does identical work to Dijkstra, so $O(|E| \log |E|)$

Can we just pick an arbitrary source node? Why or why not?

What about directed graphs? Will this work?

No - if source node is not connected to all other nodes.

Even if it is, we may have a suboptimal solution:

![figures/prim-fail.png](figures/prim-fail.png)