# Union-Find Data Structures

## Helper Functions for Experiments and Drawing

We use again `networkx` for representing and drawing graphs/trees. In addition, we use `pygraphviz` for a better plotting of trees.

In [None]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout
import networkx as nx
import pygraphviz

We randomly create a small example input:

In [None]:
import itertools
import random

class Example:
    def __init__(self, no_nodes, no_union_calls):
        self.no_nodes = no_nodes
        self.union_calls = []
        
        node_pairs = list(itertools.combinations(range(no_nodes), 2))
        sampled_pairs = random.sample(node_pairs, no_union_calls)
        
        # In the sampled pairs, the node with the smaller number
        # always is the first component. We thus swap the order
        # for some of the pairs       
        for node1, node2 in sampled_pairs:
            if random.choice((True, False)):
                node1, node2 = node2, node1
            self.union_calls.append((node1, node2))

small_example = Example(10, 8)
print(small_example.union_calls)

To be able to quickly verify the results of the algorithms below, we visualize the resulting graph.

In [None]:
graph = nx.Graph()
graph.add_nodes_from(range(small_example.no_nodes))
graph.add_edges_from(small_example.union_calls)
pos = nx.spring_layout(graph, k=1)
# if you have scipy installed, you can also use:
# pos = nx.kamada_kawai_layout(graph)
nx.draw(graph, pos, with_labels=True, 
        node_size=1000, node_color='lightblue')

 ## Quick-Find

Quick-Find is a rather naive approach that stores for each vertex the representative of the corresponding connected component. If two connected components are merged, we need to iterate over all vertices and update for one of the two components the representative to the representative of the other component.

In [None]:
class QuickFind:
    def __init__(self, no_nodes):
        self.representative = list(range(no_nodes))
        self.components = no_nodes

    def find(self, v):
        return self.representative[v]

    def union(self, v, w):
        repr_v = self.find(v)
        repr_w = self.find(w)
        if repr_v == repr_w:  # already in same component
            return
        # replace all occurrences of repr_v in self.representative with repr_w
        for i in range(len(self.representative)):
            if self.representative[i] == repr_v:
                self.representative[i] = repr_w
        self.components -= 1  # we merged two components
        
    def connected(self, v, w):
        return self.find(v) == self.find(w)

    def count(self):
        return self.components

In [None]:
qf = QuickFind(small_example.no_nodes)
print("Initial representatives:", qf.representative)
print()

for x, y in small_example.union_calls:
    qf.union(x, y)
    print("after union(%i, %i):" % (x, y))
    print(qf.representative)
    print()

In [None]:
qf.connected(0, 6)

## Quick-Union

Quick-Union stores all vertices of a connected component in a tree. The advantage is that in `union` one does not have to traverse all vertices. In `find`, we can no longer directly access the representative, but use instead the root of the corresponding tree.

In [None]:
class QuickUnion:
    def __init__(self, no_nodes):
        self.parent = list(range(no_nodes))
        self.components = no_nodes

    def find(self, v):
        while self.parent[v] != v:
            v = self.parent[v]
        return v

    def union(self, v, w):
        repr_v = self.find(v)
        repr_w = self.find(w)
        if repr_v == repr_w:  # already in same component
            print("Vertices", v, "and", w, "already were in",
                  "the same connected component.")
            return
        self.parent[repr_v] = repr_w
        print("Make the root", repr_v, "of the tree of", v, 
              "a child of the root", repr_w, "of the tree of", w)
        self.components -= 1
        
    def connected(self, v, w):
        return self.find(v) == self.find(w)

    def count(self):
        return self.components

Let's first write a small helper function for drawing the forest represented by `parent`:

In [None]:
def draw_forest(parent_array):
    graph = nx.DiGraph()
    graph.add_nodes_from(range(len(parent_array)))
    for child, parent in enumerate(parent_array):
        if child != parent:
            graph.add_edge(child, parent)
        
    pos = graphviz_layout(graph.reverse(), prog='dot')
    nx.draw(graph, pos, with_labels=True, node_size=300, node_color='lightblue')

We can now test Quick-Union:

In [None]:
qu = QuickUnion(small_example.no_nodes)
for x, y in small_example.union_calls:
    print("union(%i, %i)" % (x, y))
    qu.union(x, y)
    draw_forest(qu.parent)
    plt.show()
    print()

## Ranked Quick-Union

In [None]:
class RankedQuickUnion:
    def __init__(self, no_nodes):
        self.parent = list(range(no_nodes))
        self.components = no_nodes
        self.rank = [0] * no_nodes  # [0, ..., 0]

    def find(self, v):
        while self.parent[v] != v:
            v = self.parent[v]
        return v

    def union(self, v, w):
        repr_v = self.find(v)
        repr_w = self.find(w)
        if repr_v == repr_w:  # already in same component
            print("Vertices", v, "and", w, "already were in",
                  "the same connected component.")
            return

        if self.rank[repr_w] < self.rank[repr_v]:
            self.parent[repr_w] = repr_v
            print("Make the root", repr_w, "of the tree of", w, 
                  "a child of the root", repr_v, "of the tree of", v)
        else:
            self.parent[repr_v] = repr_w
            print("Make the root", repr_v, "of the tree of", v, 
                  "a child of the root", repr_w, "of the tree of", w)

            if self.rank[repr_v] == self.rank[repr_w]:
                print("Update the height of the resulting tree")
                self.rank[repr_w] += 1
        self.components -= 1

    def connected(self, v, w):
        return self.find(v) == self.find(w)

    def count(self):
        return self.components

In [None]:
rqu = RankedQuickUnion(small_example.no_nodes)
for x, y in small_example.union_calls:
    print("union(%i, %i)" % (x, y))
    rqu.union(x, y)
    draw_forest(rqu.parent)
    plt.show()
    print()

## Ranked Quick-Union with Path Compression

Path compression is an improvement that makes the trees more shallow as a side effect of a `find` call.

In [None]:
class RankedQuickUnionWithPathCompression:
    def __init__(self, no_nodes):
        self.parent = list(range(no_nodes))
        self.components = no_nodes
        self.rank = [0] * no_nodes  # [0, ..., 0]

    def find(self, v):
        if self.parent[v] == v:
            return v
        root = self.find(self.parent[v])
        self.parent[v] = root
        return root

    def union(self, v, w):
        repr_v = self.find(v)
        repr_w = self.find(w)
        if repr_v == repr_w:
            return
        if self.rank[repr_w] < self.rank[repr_v]:
            self.parent[repr_w] = repr_v
        else:
            self.parent[repr_v] = repr_w
            if self.rank[repr_v] == self.rank[repr_w]:
                self.rank[repr_w] += 1
        self.components -= 1

    def connected(self, v, w):
        return self.find(v) == self.find(w)

    def count(self):
        return self.components

Method `union` is implemented as without path compression. But since `union` internally calls `find`, it also reconnects vertices to the root.

In [None]:
rqupc = RankedQuickUnionWithPathCompression(small_example.no_nodes)
for x, y in small_example.union_calls:
    rqupc.union(x, y)
    
draw_forest(rqupc.parent)

Without path compression, the calls to `find` do not modify the data. With path compression, the trees get more and more shallow.

In [None]:
# EXERCISE identify for the randomly generated example suitable calls to find and connected:
rqupc.connected(1, 7)
rqupc.find(5)
draw_forest(rqupc.parent)