In [10]:
import numpy as np
np.random.seed(19)
def generate_arrays(scale, size):
    arr1 = np.random.randint(0, scale, size=size)
    arr2 = np.random.randint(0, scale, size=size)
    arr2[-1] = sum(arr1) - sum(arr2[:-1])
    assert sum(arr1) == sum(arr2)
    return arr1, arr2

In [11]:
import sys, math
def _format_time(timespan, precision=3):
    """Formats the timespan in a human readable form"""

    if timespan >= 60.0:
        # we have more than a minute, format that in a human readable form
        # Idea from http://snipplr.com/view/5713/
        parts = [("d", 60*60*24),("h", 60*60),("min", 60), ("s", 1)]
        time = []
        leftover = timespan
        for suffix, length in parts:
            value = int(leftover / length)
            if value > 0:
                leftover = leftover % length
                time.append(u'%s%s' % (str(value), suffix))
            if leftover < 1:
                break
        return " ".join(time)

    
    # Unfortunately the unicode 'micro' symbol can cause problems in
    # certain terminals.  
    # See bug: https://bugs.launchpad.net/ipython/+bug/348466
    # Try to prevent crashes by being more secure than it needs to
    # E.g. eclipse is able to print a µ, but has no sys.stdout.encoding set.
    units = [u"s", u"ms",u'us',"ns"] # the save value   
    if hasattr(sys.stdout, 'encoding') and sys.stdout.encoding:
        try:
            u'\xb5'.encode(sys.stdout.encoding)
            units = [u"s", u"ms",u'\xb5s',"ns"]
        except:
            pass
    scaling = [1, 1e3, 1e6, 1e9]
        
    if timespan > 0.0:
        order = min(-int(math.floor(math.log10(timespan)) // 3), 3)
    else:
        order = 3
    return u"%.*g %s" % (precision, timespan * scaling[order], units[order])

In [12]:
!pip uninstall -q -y dpss
!pip install -q --disable-pip-version-check --no-index --find-links ../target/wheels dpss
import dpss
result_dict = {"array_size": [], "n_candidates": [], "array_scale": [], "time_elapsed": []}
for arr_s in [10, 20, 30]:
    for scale in [10, 100]:
        for n_candidate in [5, 10]:
            print(f"array_size: {arr_s}, n_candidates: {n_candidate}, array_scale: {scale}")
            arr1, arr2 = generate_arrays(scale, arr_s)
            result_dict["array_size"].append(arr_s)
            result_dict["n_candidates"].append(n_candidate)
            result_dict["array_scale"].append(scale)
            result = %timeit -o dpss.sequence_matcher_m2m(arr1, arr2, n_candidate)
            result_dict["time_elapsed"].append(_format_time(result.best))
            print("\n")

array_size: 10, n_candidates: 5, array_scale: 10
198 µs ± 82.3 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


array_size: 10, n_candidates: 10, array_scale: 10
379 µs ± 149 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


array_size: 10, n_candidates: 5, array_scale: 100
1.27 ms ± 327 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


array_size: 10, n_candidates: 10, array_scale: 100
1.1 ms ± 54.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


array_size: 20, n_candidates: 5, array_scale: 10
71.5 ms ± 6.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


array_size: 20, n_candidates: 10, array_scale: 10
35.1 ms ± 3.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


array_size: 20, n_candidates: 5, array_scale: 100
20 ms ± 2.41 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


array_size: 20, n_candidates: 10, array_scale: 100
11.1 ms ± 795 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


array_size: