### constraints.py demo
This runs through several short demos of how to slap
constraints onto the lo-dimensional embedding of UMAP.

Dataset-independent constraints can be supplied to the constructor;
dataset-dependent ones, to the 'fit' or 'fit_transform' function.
The latter have a first argument that is always the index of the point.

In [1]:
from sklearn.datasets import load_iris
import numpy as np
import umap
iris = load_iris()
umapper0 = umap.UMAP(
    n_neighbors=50, learning_rate=0.5, random_state=12345, min_dist=0.001,
    init="random", n_epochs=1,
)
print("Generating an initial embedding...")
emb0 = umapper0.fit_transform(iris.data)

Generating an initial embedding...
optimize_layout_euclidean
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> int32
<class 'numpy.ndarray'> int32
<class 'int'>
<class 'numpy.ndarray'> float64
a,b <class 'numpy.float64'> <class 'numpy.float64'>
rng_state [-875795313  556638176 1282112438] <class 'numpy.ndarray'>
gamma, initial_alpha <class 'float'> <class 'float'>
negative_sample_rate <class 'int'>
parallel, verbose, densmap <class 'bool'> <class 'bool'> <class 'bool'>
densmap_kwds {'lambda': 0.0, 'frac': 0.0, 'var_shift': 0.1, 'n_neighbors': 50}
move_other True
output_constrain None
pin_mask <class 'NoneType'>
head,tail shapes (150, 2) (150, 2)


### demo 1- and 2-d pin mask, data_constrain=array

print("Pinning embeddings of pts 13 and 14 to [-5,0] and [5,0]")
# pin embeddings of two data (13 and 14) to left and right of origin
pin_mask = np.ones_like(emb0)
pin_mask[13] = 0.0
pin_mask[14] = 0.0
emb0[13] = [-5.0, 0]
emb0[14] = [+5.0, 0]
print("Specify 'init' embedding for umapper2")
umapper1 = umap.UMAP(
    n_neighbors=50, learning_rate=0.5, random_state=12346, min_dist=0.001,
    init=emb0, n_epochs=2,
)
print("Embed with pin_mask[13] and pin_mask[14] zero-vectors")
emb1 = umapper1.fit_transform(iris.data, data_constrain=pin_mask)
print("emb0[11:15]\n",emb0[11:15])
print("emb1[11:15]\n",emb1[11:15])

# Now demo pinning point 11 with a one-dimensional pin-mask
pin_mask1d = np.ones( (emb0.shape[0]), dtype=np.int32)
pin_mask1d[13] = 0.0
pin_mask1d[13] = 0.0
emb0[13] = [-4.0, 0]
emb0[14] = [+4.0, 0]
print("Embed with pin_mask[13] and pin_mask[14] zero-values")
emb1 = umapper1.fit_transform(iris.data, data_constrain=pin_mask)
print("emb0[11:15]\n",emb0[11:15])
print("emb1[11:15]\n",emb1[11:15])
print("\nGoodbye")

### demo output_constrain and data_constrain
A UMAP constraints function `y_bounder` restains *y*-values of any point to -5..+5
This is independent of data set, so it is an `output_constrain` argument
to the UMAP constructor.

A `data_constrain` function is used to pin points 13 and 14 to specific positions.

In [3]:
print("Pinning embeddings of pts 13 and 14 to [-2,0] and [2,0]")
# pin embeddings of two data (13 and 14) to left and right of origin
# via a custom constraint.  inf get no-op, other values get fixed
import umap.constraints as con
import numba
infs0 = np.full_like(emb0, np.float32(np.inf), dtype=np.float32)
infs0[13,:] = [-2.0,0]
infs0[14,:] = [+2.0,0]
@numba.njit()
def constraint_idx_pt0(idx,pt):
    con.freeinf_pt(idx,pt, infs0)
# this function DOES depent on idx of pt
    
constraints = {
    'idx_pt': constraint_idx_pt0,
}
# optional: set up the values to agree
#emb0[13] = [-2.0, 0]
#emb0[14] = [+2.0, 0]
# Here is the "move all points" version of con.freeinf
con.freeinf_pts(emb0, infs0)

# Also demo a non-indexed (UMAP constructor) constraint,
# that is independent of the iris.data.
# Here, let's constrain 'y' to be within -5.0, +5.0
# without, we got:
# emb2[11:15]
#  [[ 4.804111   3.430179 ]
#  [ 4.2598176  7.352637 ]     # <-- 'y' is big here
#  [-2.         0.       ]
#  [ 2.         0.       ]]
# With 'output_constrain':
# emb2[11:15]
# [[ 7.0536     2.275853 ]
# [ 4.8699265  1.9713866]      # y constrained
# [-2.         0.       ]
# [ 2.         0.       ]]
# So we pass illegal range for x, and legal range for y
def mk_bound_y_values(lo, hi):
    bound_los = np.array([+999.,lo], dtype=np.float32)
    bound_his = np.array([-999.,hi], dtype=np.float32)
    @numba.njit()
    def bound_y_values(pt):
        return con.dimlohi_pt(pt, bound_los, bound_his)
    # this function does NOT depend on 'idx' arg
    return bound_y_values
y_bounder = mk_bound_y_values(-5.0,+5.0)

# CHECK: x is unaffected, y range is bounded ...
pt = np.array([1.,2.], dtype=np.float32)
print("pt0",pt); y_bounder(pt); print("pt0",pt); assert pt[0] == 1.; assert pt[1] == 2.
pt[0] = 10.; pt[1] = 10.
print("pt1",pt); y_bounder(pt); print("pt1",pt); assert pt[0] == 10.; assert pt[1] == 5.
pt[0] = -10.; pt[1] = -10.
print("pt2",pt); y_bounder(pt); print("pt2",pt); assert pt[0] == -10.; assert pt[1] == -5.

assert np.all(emb0[13] == [-2.0,0])
assert np.all(emb0[14] == [+2.0,0])
print("Specify 'init' embedding for umapper2")
umapper2 = umap.UMAP(
    n_neighbors=50, learning_rate=0.5, random_state=12346, min_dist=0.001,
    output_constrain = { 'pt': y_bounder }, # any pt, ind't of dataset
    init=emb0, n_epochs=4,
)
print("Embed with pin_mask[13] and pin_mask[14] zero-vectors")
# ... whereas data_constrain depends on the dataset (point number is important)
emb2 = umapper2.fit_transform(iris.data, data_constrain=constraints)
assert np.all(emb2[:,1] >= -5.0) and np.all(emb2[:,1] <= 5.0) # output_constrain
print("emb0[11:15]\n",emb0[11:15])
print("emb2[11:15]\n",emb2[11:15])
print("\nGoodbye")

Pinning embeddings of pts 13 and 14 to [-2,0] and [2,0]
pt0 [1. 2.]
pt0 [1. 2.]
pt1 [10. 10.]
pt1 [10.  5.]
pt2 [-10. -10.]
pt2 [-10.  -5.]
Specify 'init' embedding for umapper2
Embed with pin_mask[13] and pin_mask[14] zero-vectors
X.shape (150, 4)
data_constrain keys dict_keys(['idx_pt'])
output_constrain keys dict_keys(['pt'])
optimize_layout_euclidean
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> int32
<class 'numpy.ndarray'> int32
<class 'int'>
<class 'numpy.ndarray'> float64
a,b <class 'numpy.float64'> <class 'numpy.float64'>
rng_state [1842529056 -244469075 -218331407] <class 'numpy.ndarray'>
gamma, initial_alpha <class 'float'> <class 'float'>
negative_sample_rate <class 'int'>
parallel, verbose, densmap <class 'bool'> <class 'bool'> <class 'bool'>
densmap_kwds {'lambda': 0.0, 'frac': 0.0, 'var_shift': 0.1, 'n_neighbors': 50}
move_other True
output_constrain {'pt': CPUDispatcher(<function mk_bound_y_values.<locals>.bound_y_values at 0x7f

### user-defined constraint
You can invent your own constraints.  Here we initialize and keep points
13 and 14 on the x=y line.  We chose to do it here with a gradient-style constraint.

In [4]:
print("grad constraint 13 and 14 on line y=x")
# this one has little help from umap.constraints.py,
# so define the numba constraint functions here:
@numba.njit()
def y_eq_x_pt(idx, pt):
    avg = np.sum(pt) / pt.shape[0]
    pt.fill(avg)
@numba.njit()
def y_eq_x_grad(idx, pt, grad):
    # if we cannot assume pt satisfies constraints:
    #y_eq_x_pt(idx, pt)  # put pt onto 45-degree line
    # now tangent plane projection
    y_eq_x_pt(idx, grad) # gradient also lies on the 45-degree line

# pin embeddings of two data (13 and 14) to all-coords-equal line
constraints = {
    'idx_grad': y_eq_x_grad,
}
# init 13 and 14 to 45-degree line
emb0[13,:] = [-1.0, -1.0]
emb0[14,:] = [+1.0, +1.0]
umapper3 = umap.UMAP(
    n_neighbors=50, learning_rate=0.5, random_state=12346, min_dist=0.001,
    init=emb0, n_epochs=2,
)
print("Embed with pin_mask[13] and pin_mask[14] zero-vectors")
emb3 = umapper3.fit_transform(iris.data, data_constrain=constraints)
print("emb0[11:15]\n",emb0[11:15])
print("emb3[11:15]\n",emb3[11:15])
np.testing.assert_allclose(emb3[13,0], emb3[13,1])
np.testing.assert_allclose(emb3[14,0], emb3[14,1])
print("\nGoodbye")

grad constraint 13 and 14 on line y=x
Embed with pin_mask[13] and pin_mask[14] zero-vectors
X.shape (150, 4)
data_constrain keys dict_keys(['idx_grad'])
optimize_layout_euclidean
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> int32
<class 'numpy.ndarray'> int32
<class 'int'>
<class 'numpy.ndarray'> float64
a,b <class 'numpy.float64'> <class 'numpy.float64'>
rng_state [1842529056 -244469075 -218331407] <class 'numpy.ndarray'>
gamma, initial_alpha <class 'float'> <class 'float'>
negative_sample_rate <class 'int'>
parallel, verbose, densmap <class 'bool'> <class 'bool'> <class 'bool'>
densmap_kwds {'lambda': 0.0, 'frac': 0.0, 'var_shift': 0.1, 'n_neighbors': 50}
move_other True
output_constrain None
pin_mask <class 'dict'>
head,tail shapes (150, 2) (150, 2)
kk,k 0 idx_grad
emb0[11:15]
 [[ 4.67359    3.3806326]
 [ 4.3936124  7.5779386]
 [-1.        -1.       ]
 [ 1.         1.       ]]
emb3[11:15]
 [[ 4.7785788  3.4856207]
 [ 4.2159853  7.4003115]
 

### output_constrain to box, and data_constrain with some springs and some pins
(Actually, pinning could also be done with a spring constant of $\infty$.)

In [5]:
import umap.constraints2 as con
import numba
print("spring force constraint 13 and 14 pulled towards (0,3), (0,-3)")
# This shows a "soft" constraint, with no point-projection step,
# and instead of projecting onto tangent space,
# the gradients get modified by a simple user force.

pin_idx = np.array([13,14], dtype=np.int32)
springs = np.array([0.1, 0.01], dtype=np.float32)   # note np==inf **would** have projection constraint
pin_pos = np.array([[0,3], [0,-3]], dtype=np.float32)
print("pin_idx (anchors)         ", numba.typeof(pin_idx), "\n", pin_idx)
print("springs (force constants) ", numba.typeof(springs),  "\n", springs)
print("pin_pos (anchor positions)", numba.typeof(pin_pos), "\n", pin_pos)
# pin point 12, but not via an infinite force spring... for show
emb0[12,:] = [0.5, 0.5]
my_pinned = np.array([12], dtype=np.int32)
#@numba.njit
#def pin12_grad(idx,pt):
#    con.pinindexed_grad(idx,pt,grad,  my_pinned)
@numba.njit
def my_springs_and_pins(idx, pt, grad):
    # pt is unconstrained
    con.springindexed_grad(idx,pt, grad, pin_idx, pin_pos, springs)
    # this is equivalent to an infinite force spring, but just for show...
    con.pinindexed_grad(idx,pt, grad, my_pinned)
# Note: we can only supply one function per dictionary key for constraints
# This is unfortunate.  A list/tuple might be OK in upstream numba versions

# second constraint (every pt inside simple box)
my_los = np.full(2, -5.0, dtype=np.float32) # x and y low bound <- -5.0
my_his = np.full(2, +5.0, dtype=np.float32)
#original:
#@numba.njit
#def my_box(idx, pt):  # 'idx_pt' argument list
#    con.dimlohi_pt(pt, my_los, my_his)
# Note: idx is not needed -- this can now be supplied as an
# 'output_constrain' value, to the UMAP constructor.
@numba.njit
def my_box2(pt):
    con.dimlohi_pt(pt, my_los, my_his)
# pin embeddings of two data (13 and 14) to all-coords-equal line
constraints = {
    #'idx_pt':   my_box, # better: this does not depend on 'idx',
    #  so it's better to give 'output_constrain=my_box2' in UMAP constructor
    'idx_grad': my_springs_and_pins,
}
# init 13 and 14 "anywhere"
emb0[13,:] = [0, +5]
emb0[14,:] = [0, -5]
print("emb0[11:15] before my_box\n",emb0[11:15])
umapper4 = umap.UMAP(
    n_neighbors=50, learning_rate=0.5, random_state=12347, min_dist=0.001,
    output_constrain = { 'pt': my_box2 },
    init=emb0, n_epochs=4,
)
print("Embed with pin_mask[13] and pin_mask[14] zero-vectors")
emb4 = umapper4.fit_transform(iris.data, data_constrain=constraints)
print("emb0[11:15]\n",emb0[11:15])
print("emb4[11:15]\n",emb4[11:15])
print("pin_pos[0]", pin_pos[0], "distance:",np.linalg.norm(emb4[13] - pin_pos[0]))
print("pin_pos[1]", pin_pos[1], "distance:",np.linalg.norm(emb4[14] - pin_pos[1]))
print("\nGoodbye")

spring force constraint 13 and 14 pulled towards (0,3), (0,-3)
pin_idx (anchors)          array(int32, 1d, C) 
 [13 14]
springs (force constants)  array(float32, 1d, C) 
 [0.1  0.01]
pin_pos (anchor positions) array(float32, 2d, C) 
 [[ 0.  3.]
 [ 0. -3.]]
emb0[11:15] before my_box
 [[ 4.67359    3.3806326]
 [ 0.5        0.5      ]
 [ 0.         5.       ]
 [ 0.        -5.       ]]
Embed with pin_mask[13] and pin_mask[14] zero-vectors
X.shape (150, 4)
data_constrain keys dict_keys(['idx_grad'])
output_constrain keys dict_keys(['pt'])
optimize_layout_euclidean
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> float32
<class 'numpy.ndarray'> int32
<class 'numpy.ndarray'> int32
<class 'int'>
<class 'numpy.ndarray'> float64
a,b <class 'numpy.float64'> <class 'numpy.float64'>
rng_state [  872516537 -2141808696   162233487] <class 'numpy.ndarray'>
gamma, initial_alpha <class 'float'> <class 'float'>
negative_sample_rate <class 'int'>
parallel, verbose, densmap <class 'bool'> <class 'bo