# Closest Point Pair

From Wikipedia, 
> The closest pair of points problem or closest pair problem is a problem of computational geometry: given n points in metric space, find a pair of points with the smallest distance between them.


In [None]:
import random
import matplotlib.pyplot as plt

In [None]:
nr_points = 50
points = [(random.randint(1, 1000), random.randint(1, 1000)) for _ in range(1, nr_points)]

In [None]:
X = [x[0] for x in points]
Y = [y[1] for y in points]

plt.figure(figsize=(20,20))
p = plt.scatter(X, Y, s=100)

## Distance

In [None]:
import math

def dist(p, q):
    px, py = p
    qx, qy = q
    return math.sqrt((px - qx)**2 + (py - qy)**2)

INF = 1e100

## Bruteforce
_Time Complexity: O(n^2)_

In [None]:
def closest_pair_bruteforce(pts):
    min_dist = INF
    closest_pair = None, None
    for p1 in pts:
        for p2 in pts:
            if p1 != p2:
                p1p2_dist = dist(p1, p2)
                if p1p2_dist < min_dist:
                    min_dist = p1p2_dist
                    closest_pair = p1, p2
                    
    return min_dist, closest_pair
                    
closest_pair_bruteforce(points)

## Divide & Conquer

### Reduce problem to one or more sub-problems of the same type
_Time Complexity: O(n(logn)^2)_

In [None]:
def _boundary_closest_pair(pts, d, mid_pt):
    # boundary contains all the points (less than d) closer to bounadry line
    boundary = [p for p in pts if abs(p[0] - mid_pt[0]) < d]
    
    # sort by Y cordinates
    boundary.sort(key=lambda p: p[1]) # O(nlogn)
    
    min_d = d
    bpair = None
    
    # loop over boundary points till the dist between y coord is less than d
    for p in boundary:
        for q in boundary:
            if p!=q and q[1] - p[1] < min_d:
                if dist(p, q) < min_d:
                    min_d = dist(p, q)
                    bpair = p, q
    
    # we found the pair on boundary
    if min_d != d:
        return min_d, bpair
    return INF, bpair

In [None]:
def closest_pair_dc(pts):
    if len(pts) <= 1:
        return INF, None
    
    if len(pts) == 2:
        p = pts[0]
        q = pts[1]
        return dist(p, q), (p, q)
    
    # DIVIDE
    half = len(pts) // 2
    left = pts[:half]
    right = pts[half:]
    
    # CONQUER
    ld, lpair = closest_pair_dc(left)
    rd, rpair = closest_pair_dc(right)
    
    sd, bpair = _boundary_closest_pair(pts, min(ld, rd), pts[half])
    
    if ld < rd and ld < sd:
        return ld, lpair
    if rd < ld and rd < sd:
        return rd, rpair
    return sd, bpair

In [None]:
# Sort points by X axis, why? Closest pair is adjacent in ordered list, saves us bruteforce, do not need to calc distances
# between all pairs
points_sorted = sorted(points, key=lambda p: p[0]) # O(logn)

d, pair = closest_pair_dc(points_sorted)
d, pair

## Visulize closest pair

In [None]:
c = [0] * len(points)
for i in range(len(points)):
    if points[i] == pair[0]:
        c[i] = 3
    if points[i] == pair[1]:
        c[i] = 2
    

plt.figure(figsize=(20,20))
p = plt.scatter(X, Y, s=100, c=c)

# draw initial boundary line
mid = points[len(points) // 2]
_ = plt.axvline(x=mid[0])

# Can we do better?

We can reduce time complexity to _O(nlogn)_ by sorting the input array on y coords for boundary window case as we are doing sorting on boundary window on each
recusrsive call

Refer: https://www.geeksforgeeks.org/closest-pair-of-points-onlogn-implementation/

# Use cases

- GIS - nearest poi of same type
- Gaming - avoid collison with nearby object

# Refrences

- https://www.youtube.com/watch?v=frir6Sf7ft4
- https://www.geeksforgeeks.org/closest-pair-of-points-using-divide-and-conquer-algorithm/
- https://www.cs.mcgill.ca/~cs251/ClosestPair/ClosestPairDQ.html