In [1]:
# setup
from IPython.core.display import display,HTML
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))
display(HTML(open('../rise.css').read()))

# imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style="whitegrid", font_scale=1.5, rc={'figure.figsize':(12, 6)})


  from IPython.core.display import display,HTML


# CMPS 2200
# Introduction to Algorithms

## Spanning Trees


 <center>
<img src="figures/muddy_city2.png" width=70%/>
</center>

<span style="color:red">**Problem 1:**</span> [**Traveling Salesperson Problem**] To find a "tour" of the sites with shorest path without twice stops except the starting site.
    
<span style="color:red">**Problem 2:**</span> [**Single-Source Shortest Path**] To find the shortest path given the starting site to other sites.
        
<span style="color:red">**Problem 3:**</span> [**Minimal Spanning Tree**] To find a shortest path to connect all sites.





### Spanning Tree
For a connected undirected graph $G = (V,E)$, a **spanning tree** is a tree $T = (V,E')$ with $E' \subseteq E$. **spanning tree** includes all vertices.


> A tree is a type of graph that is connected, acyclic (meaning it has no cycles or loops), and has a single root node. 

Suppose we have a weighted graph:

<center>
<img src="figures/st3.png"/>
</center>

We refer to the **weight** of a tree $T$ with edges $E(T)$ as:

$$w(T) = \sum_{e \in E(T)} w(e)$$

Is there a smaller tree in this graph?

<center>
<img src="figures/st4.png"/>
</center>

This is called the **minimum spanning tree (MST)**  of the graph.


What is the brute-force approach to find the MST?

As usual, we'll have an exponential number of possible spanning trees to consider.

<br><br>

What about a greedy approach?

Can we just select edges in increasing order of weight?

### Graph cut

We can view the $visited$ and $frontier$ sets as defining a **graph cut**.


A **graph cut** of a graph $(G,V)$ is a partitioning of vertices $V_1 \subset V$, $V_2 = V - V_1$.

Each vertex set $V_i \subset V$ defines a **vertex-induced subgraph** consisting of edges where both endpoints are in $V_i$.

For example:

<center>
    <img src="figures/cut1.jpg"/>
</center>

In this partition, we have:

- $G_1 = (V_1, E_1)~~~~V_1=\{a,b,c,d\}, E_1 = \{(a,b), (a,c), (b,d), (c, d)\}$
- $G_2 = (V_2, E_2)~~~~V_2=\{e,f\}, E_2 = \{(e,f)\}$


The **cut edges** are those that join the two subgraphs, e.g., $\{(b,e), (d,f)\}$.


We want to know if the lightest edge between the $visited$ and $frontier$ sets will be in the MST.

<!-- 
<span style="color:red">**Question:**</span> Can we consider divide-and-conquer framework here? -->

### Light-edge property

Let $G = (V,E,w)$ be a connected undirected, weighted graph with distinct edge weights. 

For any cut of $G$, the minimum weight edge that crosses the cut is contained in the minimum spanning tree of $G$.

<center>
    <img src="figures/cut.jpg"/>
</center>

<br>

**Proof by Contradiction**:

- Assume that the lightest edge $e = \{u,v\}$ is not in the MST.
- Then, there must be some other path connecting $u$ to $v$ that goes through some other edge $e'$.
- By assumption, $e'$ must be heavier that $e$.
- But, we know that we can swap $e'$ for $e$ and still having a spanning tree, one that will be lighter. This is a contradiction. $\square$


<br>

### How can we use the light-edge property to find the MST using priority search?

## Prim's Algorithm

Perform **priority-first search** on $G$ starting from an <span style="color:red">**arbitrary**</span> vertex $s$.

To select the next edge to expand the frontier $X$, use priority:
- $p(v) = \min_{x \in X} w(x,v)$
- Add the chosen edge $(u,v)$ to the tree.



<center>
    <img src="figures/prim.jpg" width=40%/>
</center>

- Edge $(c, f)$ has minimum weight across the cut $(X,Y)$.
- So, we visit $f$ by adding it to the frontier


This sounds very similar to Dijkstra's algorithm. **What's the difference?**

In [1]:
from heapq import heappush, heappop 

def dijkstra(graph, source):
    def dijkstra_helper(visited, frontier):
        if len(frontier) == 0:
            return visited
        else:
            distance, node = heappop(frontier)
            if node in visited:
                return dijkstra_helper(visited, frontier)
            else:
                print('visiting', node)
                visited[node] = distance
                for neighbor, weight in graph[node]:
                    heappush(frontier, (distance + weight, neighbor))                
                return dijkstra_helper(visited, frontier)
        
    frontier = []
    heappush(frontier, (0, source))
    visited = dict()  # store the final shortest paths for each node.
    return dijkstra_helper(visited, frontier)

graph = {
            's': {('a', 4), ('b', 8)},
            'a': {('s', 4), ('b', 2), ('c', 5)},
            'b': {('s', 8), ('a', 2), ('c', 3)}, 
            'c': {('a', 5), ('b', 3), ('d', 3)},
            'd': {('c', 3)},
        }
dijkstra(graph, 's')

visiting s
visiting a
visiting b
visiting c
visiting d


{'s': 0, 'a': 4, 'b': 6, 'c': 9, 'd': 12}

In [2]:
def prim(graph):
    def prim_helper(visited, frontier, tree):
        if len(frontier) == 0:
            return tree
        else:
            weight, node, parent = heappop(frontier)
            if node in visited:
                return prim_helper(visited, frontier, tree)
            else:
                print('visiting', node)
                # record this edge in the tree
                tree.add((weight, node, parent))
                visited.add(node)
                for neighbor, w in graph[node]:
                    heappush(frontier, (w, neighbor, node))    
                    # compare with dijkstra:
                    # heappush(frontier, (distance + weight, neighbor))                

                return prim_helper(visited, frontier, tree)
        
    # pick first node as source arbitrarily
    source = list(graph.keys())[0]
    frontier = []
    heappush(frontier, (0, source, source))
    visited = set()  # store the visited nodes (don't need distance anymore)
    tree = set()
    prim_helper(visited, frontier, tree)
    return tree

prim(graph)

visiting s
visiting a
visiting b
visiting c
visiting d


{(0, 's', 's'), (2, 'b', 'a'), (3, 'c', 'b'), (3, 'd', 'c'), (4, 'a', 's')}

## Work of Prim's Algorithm

This does identical work to Dijkstra, so $O(|E| \log |E|)$

Can we just pick an arbitrary source node? Why or why not?

What about directed graphs? Will this work?

No - if source node is not connected to all other nodes.

Even if it is, we may have a suboptimal solution:

![figures/prim-fail.png](figures/prim-fail.png)

<center>
    <img src="figures/mst_example.jpeg" width=50%/>
</center>

<br>