# Text documents

In [None]:
sections = np.split(sparsedata.indices, sparsedata.indptr[1:-1]) # Fast since it's all numpy

In [None]:
# This can be done faster, see UMAPs numba definitions for sparse distances
@numba.njit
def jaccard_similarity(a, b):
    intersect = len(set(a).intersection(set(b)))
    return intersect / (len(a) + len(b) - intersect)

In [None]:
@numba.njit(parallel=True)
def pairwise_sections(sections, metric):
    m = np.zeros((len(sections), len(sections)), dtype=np.float64)
    for i in range(len(sections)):
        sect1 = sections[i]
        for j in numba.prange(i+1, len(sections)):
            sect2 = sections[j]
            m[i, j] = metric(sections[i], sections[j])
    return m

# dejong attractor

Make the grid outside numba, since it's all fast and numpy anyways (pretty sure there are faster ways to do this though):

In [None]:
def grid(N):
    cur = np.zeros((N, N, 2), dtype=np.float64)
    cur[:, :, 0] = np.arange(N)[:, None] + np.zeros(N)
    cur[:, :, 1] = np.arange(N) + np.zeros(N)[:, None]
    return cur

In [None]:
@numba.njit(parallel=True)
def dejong(m, a, b, c, d, it=8):
    cur = m.copy()
    prev = cur.copy()
    for i in range(it):
        prev[:] = cur[:]
        cur[:, :, 0] = sin(a * prev[:, :, 1]) - cos(b * prev[:, :, 0]) / 2
        cur[:, :, 1] = sin(c * prev[:, :, 0]) - cos(d * prev[:, :, 1]) / 2
    return cur