In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

**Eric Meinhardt / emeinhardt@ucsd.edu**

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Overview" data-toc-modified-id="Overview-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Overview</a></span></li><li><span><a href="#Comparison-of-basic-representations-and-operations" data-toc-modified-id="Comparison-of-basic-representations-and-operations-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Comparison of basic representations and operations</a></span><ul class="toc-item"><li><span><a href="#Baseline-representation-of-partial-feature-vectors" data-toc-modified-id="Baseline-representation-of-partial-feature-vectors-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Baseline representation of partial feature vectors</a></span><ul class="toc-item"><li><span><a href="#Overhead---generation,-well-formedness,-uniquification" data-toc-modified-id="Overhead---generation,-well-formedness,-uniquification-2.1.1"><span class="toc-item-num">2.1.1&nbsp;&nbsp;</span>Overhead - generation, well-formedness, uniquification</a></span></li><li><span><a href="#Agreement" data-toc-modified-id="Agreement-2.1.2"><span class="toc-item-num">2.1.2&nbsp;&nbsp;</span>Agreement</a></span></li><li><span><a href="#Union" data-toc-modified-id="Union-2.1.3"><span class="toc-item-num">2.1.3&nbsp;&nbsp;</span>Union</a></span></li><li><span><a href="#Intersection" data-toc-modified-id="Intersection-2.1.4"><span class="toc-item-num">2.1.4&nbsp;&nbsp;</span>Intersection</a></span></li><li><span><a href="#Extension" data-toc-modified-id="Extension-2.1.5"><span class="toc-item-num">2.1.5&nbsp;&nbsp;</span>Extension</a></span></li></ul></li><li><span><a href="#Specification-array-+-value-array" data-toc-modified-id="Specification-array-+-value-array-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Specification array + value array</a></span><ul class="toc-item"><li><span><a href="#Converting-between-representations" data-toc-modified-id="Converting-between-representations-2.2.1"><span class="toc-item-num">2.2.1&nbsp;&nbsp;</span>Converting between representations</a></span></li><li><span><a href="#Operations" data-toc-modified-id="Operations-2.2.2"><span class="toc-item-num">2.2.2&nbsp;&nbsp;</span>Operations</a></span></li><li><span><a href="#Performance-evaluation" data-toc-modified-id="Performance-evaluation-2.2.3"><span class="toc-item-num">2.2.3&nbsp;&nbsp;</span>Performance evaluation</a></span><ul class="toc-item"><li><span><a href="#Conclusion" data-toc-modified-id="Conclusion-2.2.3.1"><span class="toc-item-num">2.2.3.1&nbsp;&nbsp;</span>Conclusion</a></span></li></ul></li></ul></li><li><span><a href="#Matrix-extension-of-the-baseline-representation" data-toc-modified-id="Matrix-extension-of-the-baseline-representation-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Matrix extension of the baseline representation</a></span><ul class="toc-item"><li><span><a href="#Agreement-testing" data-toc-modified-id="Agreement-testing-2.3.1"><span class="toc-item-num">2.3.1&nbsp;&nbsp;</span>Agreement testing</a></span></li><li><span><a href="#Union" data-toc-modified-id="Union-2.3.2"><span class="toc-item-num">2.3.2&nbsp;&nbsp;</span>Union</a></span></li><li><span><a href="#Intersection" data-toc-modified-id="Intersection-2.3.3"><span class="toc-item-num">2.3.3&nbsp;&nbsp;</span>Intersection</a></span></li><li><span><a href="#Extension" data-toc-modified-id="Extension-2.3.4"><span class="toc-item-num">2.3.4&nbsp;&nbsp;</span>Extension</a></span></li><li><span><a href="#Conclusion" data-toc-modified-id="Conclusion-2.3.5"><span class="toc-item-num">2.3.5&nbsp;&nbsp;</span>Conclusion</a></span></li></ul></li><li><span><a href="#pytorch-and-gpus" data-toc-modified-id="pytorch-and-gpus-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span><code>pytorch</code> and gpus</a></span><ul class="toc-item"><li><span><a href="#Agreement" data-toc-modified-id="Agreement-2.4.1"><span class="toc-item-num">2.4.1&nbsp;&nbsp;</span>Agreement</a></span></li><li><span><a href="#Union" data-toc-modified-id="Union-2.4.2"><span class="toc-item-num">2.4.2&nbsp;&nbsp;</span>Union</a></span></li><li><span><a href="#Intersection" data-toc-modified-id="Intersection-2.4.3"><span class="toc-item-num">2.4.3&nbsp;&nbsp;</span>Intersection</a></span></li><li><span><a href="#Extension" data-toc-modified-id="Extension-2.4.4"><span class="toc-item-num">2.4.4&nbsp;&nbsp;</span>Extension</a></span></li><li><span><a href="#Conclusion" data-toc-modified-id="Conclusion-2.4.5"><span class="toc-item-num">2.4.5&nbsp;&nbsp;</span>Conclusion</a></span></li></ul></li></ul></li></ul></div>

In [2]:
import numpy as np
myint = np.int8

from vg import normalize

In [3]:
from bitarray import bitarray

In [4]:
from itertools import starmap, product

In [5]:
# from more_itertools import unique_everseen

In [6]:
from tqdm import tqdm

from joblib import Parallel, delayed, Memory

J = 30
BACKEND = 'multiprocessing'
# BACKEND = 'loky'
V = 10
PREFER = 'processes'
# PREFER = 'threads'

def par(gen_expr, j=None, backend=None, verbose=None, prefer=None):
    if j is None:
        j = J
    if backend is None:
        backend = BACKEND
    if verbose is None:
        verbose = V
    if prefer is None:
        prefer = PREFER
    return Parallel(n_jobs=j, backend=backend, verbose=verbose, prefer=prefer)(gen_expr)

def identity(x):
    return x

In [7]:
from random import choice

In [8]:
CAREFUL = False

# Overview

Goal of this notebook: find / document representations of and operations on partial feature vectors with an eye towards efficient calculation.

# Comparison of basic representations and operations

## Baseline representation of partial feature vectors

A partial feature vector $p$ on $m$ features is an element of $\{-1,0,1\}^m$, where
 - $p_i = 0$ iff feature $i$ is unspecified
 - $p_i = -1$ iff feature $i$ is specified $-$
 - $p_i = 1$ iff feature $i$ is specified $+$

Below this representation is implemented using `numpy` `int8` arrays and (usually) vectorized operations on them.

### Overhead - generation, well-formedness, uniquification

In [9]:
m = 9

In [10]:
def make_generator_vectors(num_features):
    basis_vectors = [np.zeros(num_features, dtype=myint) for each in range(num_features)]
    basis_vectors_neg = [np.zeros(num_features, dtype=myint) for each in range(num_features)]
    for i,v in enumerate(basis_vectors):
        v[i] = 1
    for i,v in enumerate(basis_vectors_neg):
        v[i] = -1
    generators = basis_vectors + basis_vectors_neg
    return generators

In [11]:
generators = make_generator_vectors(m)
generators

[array([1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 0, 0, 1, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int8),
 array([0, 0, 0, 0, 0, 1, 0, 0, 0], dtype=int8),
 array([0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int8),
 array([0, 0, 0, 0, 0, 0, 0, 1, 0], dtype=int8),
 array([0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int8),
 array([-1,  0,  0,  0,  0,  0,  0,  0,  0], dtype=int8),
 array([ 0, -1,  0,  0,  0,  0,  0,  0,  0], dtype=int8),
 array([ 0,  0, -1,  0,  0,  0,  0,  0,  0], dtype=int8),
 array([ 0,  0,  0, -1,  0,  0,  0,  0,  0], dtype=int8),
 array([ 0,  0,  0,  0, -1,  0,  0,  0,  0], dtype=int8),
 array([ 0,  0,  0,  0,  0, -1,  0,  0,  0], dtype=int8),
 array([ 0,  0,  0,  0,  0,  0, -1,  0,  0], dtype=int8),
 array([ 0,  0,  0,  0,  0,  0,  0, -1,  0], dtype=int8),
 array([ 0,  0,  0,  0,  0,  0,  0,  0, -1], dtype=int8)]

In [12]:
max_num_objects = 2 ** m
max_num_objects

max_num_partial_fvs = (2 + 1) ** m
max_num_partial_fvs

512

19683

In [13]:
def wf_pfv(v):
    allowedValues = {-1,0,1}
    return all([x in allowedValues for x in v])

In [14]:
def make_random_pfv():
    return np.random.randint(3, size=m, dtype=myint) - 1

In [15]:
def uniquify(ndarray_iterable):
    tuples = [tuple(a) for a in ndarray_iterable]
    s = set(tuples)
    arrays = [np.array(t) for t in s]
    return arrays

In [16]:
def wf_tfv(v):
    allowedValues = {-1,1}
    return all([x in allowedValues for x in v])

In [17]:
max_num_objects
actual_num_objects = np.random.randint(max_num_objects)
# actual_num_objects = 40
actual_num_objects

assert actual_num_objects < max_num_objects

512

282

In [18]:
objects = tuple(set([tuple(np.random.randint(2, size=m)) for each in range(actual_num_objects)]))
objects = tuple(map(np.array, objects))
l = len(objects)

def zeroToMinusOne(u):
    return np.array([x if x == 1 else -1 for x in u])

objects = tuple([zeroToMinusOne(o) for o in objects])


actual_num_objects = len(objects)
actual_num_objects
objects

216

(array([-1, -1, -1, -1,  1,  1,  1,  1,  1]),
 array([ 1,  1,  1,  1,  1, -1,  1,  1, -1]),
 array([ 1,  1,  1,  1,  1, -1, -1,  1, -1]),
 array([-1,  1,  1,  1, -1,  1, -1, -1, -1]),
 array([-1,  1, -1, -1,  1,  1, -1, -1,  1]),
 array([ 1, -1,  1,  1, -1, -1, -1,  1,  1]),
 array([ 1, -1, -1,  1,  1, -1, -1,  1,  1]),
 array([ 1,  1,  1, -1,  1,  1, -1, -1, -1]),
 array([-1, -1,  1,  1, -1,  1, -1,  1,  1]),
 array([-1, -1, -1,  1, -1, -1, -1, -1, -1]),
 array([-1,  1, -1, -1,  1, -1, -1,  1, -1]),
 array([ 1,  1, -1, -1, -1,  1, -1,  1, -1]),
 array([ 1,  1, -1, -1, -1, -1,  1, -1, -1]),
 array([ 1, -1,  1,  1,  1, -1, -1,  1,  1]),
 array([-1,  1, -1, -1, -1,  1,  1, -1,  1]),
 array([-1,  1,  1, -1,  1,  1, -1,  1,  1]),
 array([ 1,  1,  1, -1,  1, -1,  1,  1,  1]),
 array([ 1, -1,  1, -1,  1, -1, -1, -1, -1]),
 array([ 1,  1,  1, -1, -1, -1, -1,  1, -1]),
 array([-1, -1,  1,  1,  1, -1,  1, -1,  1]),
 array([ 1,  1,  1, -1, -1,  1,  1, -1, -1]),
 array([ 1,  1, -1, -1, -1, -1, -1

In [19]:
objectMap = np.array([objects[i] for i in range(l)])
objectMap.shape
objectMap
objectMap[0]

(216, 9)

array([[-1, -1, -1, ...,  1,  1,  1],
       [ 1,  1,  1, ...,  1,  1, -1],
       [ 1,  1,  1, ..., -1,  1, -1],
       ..., 
       [ 1,  1, -1, ...,  1, -1,  1],
       [ 1,  1,  1, ...,  1, -1,  1],
       [ 1,  1, -1, ...,  1, -1, -1]])

array([-1, -1, -1, -1,  1,  1,  1,  1,  1])

In [20]:
def getIndex(o):
    matches = [i for i,v in enumerate(objectMap) if np.array_equal(v,o)]
    if len(matches) == 0:
        return -1
    if CAREFUL:
        assert len(matches) == 1
    return matches[0]

In [21]:
def makeExtensionVector(positive_Indices):
    return np.array([1 if i in positive_Indices else 0 for i in np.arange(objectMap.shape[0])], dtype=myint)

In [22]:
makeExtensionVector([0, 4, 8])

array([1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

### Agreement

In [23]:
def ag(x,y):
    '''
    Formula:
    (x == 0 or y == 0) or ((x != 0 and y != 0) and (x == y)), where T = 1 and F = 0
    
    Pattern:
    x = x ⟶ 1
    0 = _ ⟶ 1
    _ = 0 ⟶ 1
    _ = _ ⟶ 0
    '''
    if x == y:
        return True
    elif x == 0:
        return True
    elif y == 0:
        return True
    else:
        return False

In [24]:
def agree(u,v):
    '''
    Given two vectors u and v, returns a binary vector indicating,
    elementwise, whether u and v 'agree'.
    
    agree(u[i], v[i]) iff (u[i] == 0 or v[i] == 0) or (u[i] == v[i])
    '''
#     return np.array([True if (u[i] == 0 or v[i] == 0) or (u[i] == v[i]) else False 
#                      for i in range(len(u))])
    return np.array([1 if (u[i] == 0 or v[i] == 0) or (u[i] == v[i]) else 0 
                     for i in range(len(u))], dtype=myint)

def agree_(u,v):
    '''
    Given two vectors u and v, return 1 iff u and v agree at all indices
    and 0 otherwise.
    '''
    ag = agree(u,v)
    return int(ag.all())

def agree_alt(u,v):
    '''
    Given two vectors u and v, return 1 iff u and v agree at all indices
    and 0 otherwise.
    '''
    ag = agree(u,v)
    total_agreement = np.linalg.norm(agree(u,v), 1) == m
    return int(total_agreement)
#     if total_agreement:
#         return 1.0
#     return 0.0

In [25]:
def make_agreeing_vector_pair(pred=None):
    u = make_random_pfv()
    v = make_random_pfv()
    if pred is None:
        while not agree_(u,v):
            u = make_random_pfv()
            v = make_random_pfv()
        return u,v
    while not agree_(u,v) and not pred(u,v):
        u = make_random_pfv()
        v = make_random_pfv()
    return u,v

In [26]:
num_test_pairs = int(1e5)
random_vector_pairs = [(make_random_pfv(), make_random_pfv()) for each in range(num_test_pairs)]
len(random_vector_pairs)

100000

In [27]:
for pair in random_vector_pairs:
    assert agree_(*pair) == agree_alt(*pair)

In [28]:
%%timeit

list(starmap(agree, random_vector_pairs))

2.55 s ± 20.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
%%timeit

list(starmap(agree_, random_vector_pairs))

2.91 s ± 14.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
%%timeit

list(starmap(agree_alt, random_vector_pairs))

6.12 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
num_test_pairs = int(1e5)
agreeing_vector_pairs = [make_agreeing_vector_pair() for each in range(num_test_pairs)]
len(agreeing_vector_pairs)

100000

### Union

The union of two partial feature vectors $u,v$ that agree should result in a partial feature vector that has every specified value in $u$, every specified value in $v$, and no other specified values.

In general, the result is at least as specified as either $u$ or $v$: when $u=v$ $u \cup v = u = v$ and $u \cup v$ is no more specified, but otherwise $u \cup v$ will be strictly more specified than either $u$ or $v$.

In [32]:
XYs = tuple(product((-1,0,1), (-1,0,1)))
XYs

def cup(x,y):
    '''
    Formula:
    x or y, where 1 = T, -1 = T, 0 = F
    
    Algebra:
    0 is the identity ∀x ∈ {-1,0,+1}
    x is its own identity ∀x ∈ {-1,0,+1}
    (-1 and +1 are mutual inverses, but this case shouldn't occur when agree(x,y) holds)
    
    Pattern:
    x ∪ x = x
    
    0 ∪ y = y
    x ∪ 0 = x
    
    _ ∪ _ = 0  \\ <- shouldn't occur in two pfvs that agree
    '''
    if x == 0:  #if x is unspecified, return y
        return y
    elif y == 0: #if y is unspecified, return x
        return x
    elif x == y: #if both are specified and the same, return their common value
        return x
    else: #otherwise return 0
        return 0

for x,y in XYs:
    ((x,y), cup(x,y))

((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1))

((-1, -1), -1)

((-1, 0), -1)

((-1, 1), 0)

((0, -1), -1)

((0, 0), 0)

((0, 1), 1)

((1, -1), 0)

((1, 0), 1)

((1, 1), 1)

In [33]:
def union(u, v):
    if CAREFUL:
        assert agree_(u,v)
    return np.sign(u + v)

def twoToOne(x):
    if x != 2 and x != -2:
        return x
    elif x == 2:
        return 1
    else:
        return -1

twoToOne_v = np.vectorize(twoToOne)

def union_alt(u, v):
    if CAREFUL:
        assert agree_(u,v)
    return np.array(twoToOne_v(u + v), dtype=myint)

def union_alt2(u, v):
    if CAREFUL:
        assert agree_(u,v)
    s = u + v
    return np.trunc( np.sqrt(np.abs(s)) ) * np.sign(s,dtype=myint)

def union_alt3(u, v):
    if CAREFUL:
        assert agree_(u,v)
    w = u.copy()
    for i,x in enumerate(v):
        if x != 0:
            w[i] = x
    return w

def union_alt4(u, v):
    if CAREFUL:
        assert agree_(u,v)
    return np.array([cup(u[i],v[i]) for i in range(m)], dtype=myint)

cup_v = np.vectorize(cup)

def union_alt5(u, v):
    if CAREFUL:
        assert agree_(u,v)
    return np.array(cup_v(u,v), dtype=myint)

In [34]:
test_pair = choice(agreeing_vector_pairs)
test_pair

union(*test_pair)
union_alt(*test_pair)
union_alt2(*test_pair)
union_alt3(*test_pair)
union_alt4(*test_pair)
union_alt5(*test_pair)

(array([-1,  0,  1, -1,  0,  0,  1,  1,  0], dtype=int8),
 array([-1, -1,  1,  0,  1,  0,  0,  1,  1], dtype=int8))

array([-1, -1,  1, -1,  1,  0,  1,  1,  1], dtype=int8)

array([-1, -1,  1, -1,  1,  0,  1,  1,  1], dtype=int8)

array([-1., -1.,  1., -1.,  1.,  0.,  1.,  1.,  1.], dtype=float16)

array([-1, -1,  1, -1,  1,  0,  1,  1,  1], dtype=int8)

array([-1, -1,  1, -1,  1,  0,  1,  1,  1], dtype=int8)

array([-1, -1,  1, -1,  1,  0,  1,  1,  1], dtype=int8)

In [35]:
%%timeit

list(starmap(union, agreeing_vector_pairs));

108 ms ± 1.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [36]:
%%timeit

list(starmap(union_alt, agreeing_vector_pairs));

2.04 s ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [37]:
%%timeit

list(starmap(union_alt2, agreeing_vector_pairs));

455 ms ± 2.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [38]:
%%timeit

list(starmap(union_alt3, agreeing_vector_pairs));

1.6 s ± 43.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
%%timeit

list(starmap(union_alt4, agreeing_vector_pairs));

2.46 s ± 12.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [55]:
%%timeit

list(starmap(union_alt5, agreeing_vector_pairs));

2.19 s ± 83.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Intersection

The intersection of two partial feature vectors $u,v$ should result in a partial feature vector that has every specified value that is specified in both $u$ and $v$ and where $u$ and $v$ agree, and no other specified values.

In general, the result is no more specified than either $u$ or $v$: when $u=v$ $u \cap v = u = v$ and $u \cap v$ is no less specified, but otherwise $u \cap v$ will be strictly less specified than either $u$ or $v$.

In [56]:
XYs = tuple(product((-1,0,1), (-1,0,1)))
XYs 
    
def cap(x,y):
    '''
    Algebra:
    0 is the annihilating element ∀x ∈ {-1,0,+1}
    x is its own identity ∀x ∈ {-1,0,+1}
    -1 and +1 annihilate each other
    
    Pattern:
    x ∩ x = x
    
    0 ∩ _ = 0
    _ ∩ 0 = 0
    
    _ ∩ _ = 0
    '''
    if x == 0: #if x is unspecified, return 0
        return 0
    elif y == 0: #if y is unspecified, return 0
        return 0
    elif x == y: #if both are specified and the same, return their common value
        return x
    else: #otherwise return 0
        return 0

def foo(x,y):
    return np.sign( (x == y) * (x + y) )

def bar(x,y):
    return (x == y) * (x + y) * 0.5

def baz(x,y):
    return (x == y) * int((x + y) / 2)

for x,y in XYs:
#     ((x,y), cap(x,y))
    ((x,y), cap(x,y), foo(x,y), bar(x,y), baz(x,y))

((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1))

((-1, -1), -1, -1, -1.0, -1)

((-1, 0), 0, 0, 0.0, 0)

((-1, 1), 0, 0, 0.0, 0)

((0, -1), 0, 0, 0.0, 0)

((0, 0), 0, 0, 0.0, 0)

((0, 1), 0, 0, 0.0, 0)

((1, -1), 0, 0, 0.0, 0)

((1, 0), 0, 0, 0.0, 0)

((1, 1), 1, 1, 1.0, 1)

In [57]:
def intersection(u, v):
    return np.sign(  np.equal(u, v) * (u + v) )

def intersection_alt(u, v):
    return np.array([cap(u[i],v[i]) for i in range(m)], dtype=myint)

def intersection_alt2(u, v):
    return np.array(np.equal(u, v) * (u + v) * 0.5, dtype=myint)

def intersection_alt3(u, v):
    return np.array([bar(u[i], v[i]) for i in range(m)], dtype=myint)

In [58]:
test_pair = choice(random_vector_pairs)
test_pair

intersection(*test_pair)
intersection_alt(*test_pair)
intersection_alt2(*test_pair)
intersection_alt3(*test_pair)

(array([ 1,  0,  1, -1,  0,  0, -1,  1,  1], dtype=int8),
 array([ 1,  1, -1, -1,  0,  0, -1,  1,  1], dtype=int8))

array([ 1,  0,  0, -1,  0,  0, -1,  1,  1], dtype=int8)

array([ 1,  0,  0, -1,  0,  0, -1,  1,  1], dtype=int8)

array([ 1,  0,  0, -1,  0,  0, -1,  1,  1], dtype=int8)

array([ 1,  0,  0, -1,  0,  0, -1,  1,  1], dtype=int8)

In [59]:
for pair in random_vector_pairs:
    assert np.array_equal(intersection(*pair), intersection_alt(*pair)), 'Agreement failure on {0}'.format(pair)
    assert np.array_equal(intersection_alt2(*pair), intersection_alt(*pair)), 'Agreement failure on {0}'.format(pair)
    assert np.array_equal(intersection_alt3(*pair), intersection_alt(*pair)), 'Agreement failure on {0}'.format(pair)

In [60]:
%%timeit

list(starmap(intersection, random_vector_pairs));

234 ms ± 829 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [61]:
%%timeit

list(starmap(intersection_alt, random_vector_pairs));

2.58 s ± 18.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [62]:
%%timeit

list(starmap(intersection_alt2, random_vector_pairs));

410 ms ± 7.47 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [63]:
%%timeit

list(starmap(intersection_alt3, random_vector_pairs));

3.92 s ± 13.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Extension

In [64]:
def extension(v, asIndexVector=True):
    '''
    The extension of a partial feature vector v is the set of object vectors
    (= fully specified, or 'total' feature vectors) that 'agree' with it.
    '''
    matches = tuple([o for o in objects if agree_(v,o)])
#     matches = tuple([o for o in objects if agree(v,o).all()])
#     matches = np.array([1.0 if np.linalg.norm(agree(v,o), 1) == num_features else 0.0 for o in objects])
    if asIndexVector:
        return makeExtensionVector([getIndex(o) for o in matches])
    return matches

In [71]:
def in_extension(s, o):
    '''
    Given a partial feature vector s and a fully specified object vector o,
    returns True iff o ∈ ⟦s⟧ and False otherwise.
    '''
    if np.array_equal( s, np.zeros(s.shape) ):
        return True
    
    s_ = np.abs(s)
    o_ = s_ * o
    s_normed = normalize(s)
    o_normed = normalize(o_)
    pr = np.dot(s_normed, o_normed)
    return np.isclose(1.0, pr)

def extension_alt(s):
    extensionVector = np.array([1 if in_extension(s, o) else 0 for o in objectMap], dtype=myint)
    return extensionVector

In [72]:
p = make_random_pfv()
extension(p)
extension_alt(p)

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

In [73]:
objectMap

array([[-1, -1, -1, ...,  1,  1,  1],
       [ 1,  1,  1, ...,  1,  1, -1],
       [ 1,  1,  1, ..., -1,  1, -1],
       ..., 
       [ 1,  1, -1, ...,  1, -1,  1],
       [ 1,  1,  1, ...,  1, -1,  1],
       [ 1,  1, -1, ...,  1, -1, -1]])

In [74]:
num_test_pairs = int(1e5)
random_vectors = [make_random_pfv() for each in range(num_test_pairs)]
len(random_vectors)

100000

In [84]:
def extension_test(v):
    foo = extension(v)
    bar = extension_alt(v)
    if not np.array_equal(foo, bar):
        raise Exception('Disagreement on {0}: {1} vs. {2}'.format(v, extension(v), extension_alt(v)))
    return True

In [96]:
par(delayed(extension_test)(v) for v in random_vectors)

# for p in tqdm(random_vectors):
#     assert np.array_equal(extension(p), extension_alt(p)), 'Disagreement on {0}: {1} vs. {2}'.format(p, extension(p), extension_alt(p))

[Parallel(n_jobs=30)]: Using backend MultiprocessingBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   1 tasks      | elapsed:    0.0s
[Parallel(n_jobs=30)]: Batch computation too fast (0.0365s.) Setting batch_size=10.
[Parallel(n_jobs=30)]: Done  12 tasks      | elapsed:    0.1s
[Parallel(n_jobs=30)]: Done  25 tasks      | elapsed:    0.1s
[Parallel(n_jobs=30)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=30)]: Done  53 tasks      | elapsed:    0.1s
[Parallel(n_jobs=30)]: Done 140 tasks      | elapsed:    0.5s
[Parallel(n_jobs=30)]: Done 310 tasks      | elapsed:    0.7s
[Parallel(n_jobs=30)]: Done 480 tasks      | elapsed:    0.9s
[Parallel(n_jobs=30)]: Done 670 tasks      | elapsed:    1.2s
[Parallel(n_jobs=30)]: Done 860 tasks      | elapsed:    1.4s
[Parallel(n_jobs=30)]: Done 1070 tasks      | elapsed:    1.7s
[Parallel(n_jobs=30)]: Done 1280 tasks      | elapsed:    2.0s
[Parallel(n_jobs=30)]: Done 1510 tasks      | elapsed:    2.2s
[Parallel(n_jobs=30

[Parallel(n_jobs=30)]: Done 92710 tasks      | elapsed:  1.9min
[Parallel(n_jobs=30)]: Done 94080 tasks      | elapsed:  2.0min
[Parallel(n_jobs=30)]: Done 95470 tasks      | elapsed:  2.0min
[Parallel(n_jobs=30)]: Done 96860 tasks      | elapsed:  2.0min
[Parallel(n_jobs=30)]: Done 98270 tasks      | elapsed:  2.0min
[Parallel(n_jobs=30)]: Done 100000 out of 100000 | elapsed:  2.1min finished


[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,

In [97]:
%%timeit

list(map(extension, random_vectors))

17min 58s ± 2.64 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [98]:
%%timeit

list(map(extension_alt, random_vectors))

16min 6s ± 3.22 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [99]:
# agree_mat(p, objectMap)

## Specification array + value array

This representation of a partial feature vector $p$ uses two bit sequences, $s$ and $v$
 - $s_i = 0$ iff $p_i = 0$ and is otherwise $1$
 - $v_i = 0$ if $p_i = -1$
 - $v_i = 1$ if $p_i = 1$

Note that the value of $v_i$ is unspecified if $p_i = 0$.

### Converting between representations

In [100]:
spec_cb = {-1:bitarray('1'),
            0:bitarray('0'),
            1:bitarray('1')}
val_cb = {-1:bitarray('0'),
           0:bitarray('0'),
           1:bitarray('1')}

In [101]:
def pfv_to_sv(pfv):
    s = bitarray()
    s.encode(spec_cb, list(pfv))
    v = bitarray()
    v.encode(val_cb, list(pfv))
    return s,v

def sv_to_pfv(s,v):
#     print('s,v = {0}, {1}'.format(s.to01(), v.to01()))
    specified = np.array(s.tolist(), dtype=myint)
    values = np.array(v.tolist(), dtype=myint)
#     print('\ts,v = {0}, {1}'.format(specified, values))
    for i in range(m):
        if specified[i] != 0:
            specified[i] = 1 if values[i] else -1
#     print('\ts = {0}'.format(specified))
    return specified

In [102]:
num_test_pairs = int(1e5)
random_vectors = [make_random_pfv() for each in tqdm(range(num_test_pairs))]
len(random_vectors)



  0%|          | 0/100000 [00:00<?, ?it/s][A[A

 23%|██▎       | 22745/100000 [00:00<00:00, 227444.79it/s][A[A

 46%|████▌     | 45741/100000 [00:00<00:00, 228193.14it/s][A[A

 69%|██████▉   | 69040/100000 [00:00<00:00, 229609.29it/s][A[A

 92%|█████████▏| 92338/100000 [00:00<00:00, 230608.65it/s][A[A

100%|██████████| 100000/100000 [00:00<00:00, 230439.80it/s][A[A

100000

In [103]:
for p in tqdm(random_vectors):
    s,v = pfv_to_sv(p)
    p_prime = sv_to_pfv(s,v)
    assert np.array_equal(p, p_prime), 'Conversion failure on {0}'.format(pair)



  0%|          | 0/100000 [00:00<?, ?it/s][A[A

  2%|▏         | 1872/100000 [00:00<00:05, 18714.26it/s][A[A

  3%|▎         | 3369/100000 [00:00<00:05, 17407.37it/s][A[A

  5%|▍         | 4949/100000 [00:00<00:05, 16889.57it/s][A[A

  7%|▋         | 6653/100000 [00:00<00:05, 16931.94it/s][A[A

  8%|▊         | 8478/100000 [00:00<00:05, 17304.41it/s][A[A

 10%|█         | 10338/100000 [00:00<00:05, 17671.55it/s][A[A

 12%|█▏        | 12153/100000 [00:00<00:04, 17810.07it/s][A[A

 14%|█▍        | 13978/100000 [00:00<00:04, 17937.73it/s][A[A

 16%|█▌        | 15837/100000 [00:00<00:04, 18126.90it/s][A[A

 18%|█▊        | 17679/100000 [00:01<00:04, 18211.50it/s][A[A

 20%|█▉        | 19511/100000 [00:01<00:04, 18241.11it/s][A[A

 21%|██▏       | 21347/100000 [00:01<00:04, 18274.99it/s][A[A

 23%|██▎       | 23165/100000 [00:01<00:04, 18245.15it/s][A[A

 25%|██▌       | 25009/100000 [00:01<00:04, 18302.58it/s][A[A

 27%|██▋       | 26835/100000 [00:01<00:04,

### Operations

If $p,q$ are two partial feature vectors and $(s^p, v^p), (s^q, v^q)$ are their associated specification and value bitvectors, then:

We can define an element-wise agree operation by pattern matching:
```
agree((s^p_i,v^p_i), (s^q_i,v^q_i)):
    (s_w, s_x) = (s_w, s_x) ⟶ 1
    (0,0)      = (_, _)     ⟶ 1
    (_, _)     = (0,0)      ⟶ 1
    _          = _          ⟶ 0
```
Or perhaps more clearly by Boolean formula:
 - $\text{agree}(s^p_i,v^p_i,s^q_i,v^q_i) = (\neg s^p_i \lor \neg s^q_i) \lor ((s^p_i \land s^q_i) \land (v^p_i \iff v^q_i)) $

We can define an element-wise union operation (assuming agreement holds) by pattern matching:
```
Assuming agree(p,q) holds:

cup((s^p_i,v^p_i), (s^q_i,v^q_i)):
    (s_w, s_x) ∪ (s_w, s_x) = (s_w, s_x)
    (0,0)      ∪ (s_y, s_z) = (s_y, s_z)
    (s_w, s_x) ∪ (0,0)      = (s_w, s_x)
    _          ∪ _          = (0,0)
```
Or again, more clearly by Boolean formula:
 - $\text{cup}(s^p_i,v^p_i,s^q_i,v^q_i) = (s^p_i \lor s^q_i, v^p_i \lor v^q_i)$

I.e. we can take the `bitwise or` of respective specification vectors and value vectors to get the specification and value vector of the union of two partial feature vectors.

We can define an element-wise intersection operation by pattern matching:
```
cap((s^p_i,v^p_i), (s^q_i,v^q_i)):
    (s_w, s_x) ∩ (s_w, s_x) = (s_w, s_x)
    (0,0)      ∩ (_, _)     = (0,0)
    (_, _)     ∩ (0,0)      = (0,0)
    _          ∩ _          = (0,0)
```
...or by Boolean formula
 - $\text{cap}(s^p_i,v^p_i,s^q_i,v^q_i) = ((s^p_i \land s^q_i) \land (v^p_i \iff v^q_i), v^p_i \land v^q_i)$

In [104]:
def xor(p,q):
    return (p & ~q) | (~p & q)

def ifthen(p,q):
    return ~p | q

def iff(p,q):
    return ifthen(p,q) & ifthen(q,p)

assert xor(bitarray('0011'), bitarray('0101')) == bitarray('0110')
assert ifthen(bitarray('0011'), bitarray('0101')) == bitarray('1101')
assert iff(bitarray('0011'), bitarray('0101')) == bitarray('1001')

In [105]:
def agree_ba(s_p, v_p, s_q, v_q):
    return (~s_p | ~s_q) | ((s_p & s_q) & iff(v_p, v_q))

def agree_ba_(s_p, v_p, s_q, v_q):
    return agree_ba(s_p, v_p, s_q, v_q).all()

def union_ba(s_p, v_p, s_q, v_q):
    return s_p | s_q, v_p | v_q

def intersection_ba(s_p, v_p, s_q, v_q):
    return (s_p & s_q) & iff(v_p, v_q), v_p & v_q

Below we test that they have the same behavior as the baseline representation and operations:

In [107]:
for p,q in tqdm(agreeing_vector_pairs):
    s_p, v_p = pfv_to_sv(p)
    s_q, v_q = pfv_to_sv(q)
    assert np.array_equal(agree(p,q), np.array(list(agree_ba(s_p, v_p, s_q, v_q)), dtype=myint))
    assert np.array_equal(union(p,q), sv_to_pfv(*union_ba(s_p, v_p, s_q, v_q)))



  0%|          | 0/100000 [00:00<?, ?it/s][A[A

  1%|          | 825/100000 [00:00<00:12, 8249.22it/s][A[A

  2%|▏         | 1645/100000 [00:00<00:11, 8233.19it/s][A[A

  2%|▏         | 2471/100000 [00:00<00:11, 8238.89it/s][A[A

  3%|▎         | 3300/100000 [00:00<00:11, 8251.90it/s][A[A

  4%|▍         | 4113/100000 [00:00<00:11, 8214.49it/s][A[A

  5%|▍         | 4934/100000 [00:00<00:11, 8212.47it/s][A[A

  6%|▌         | 5756/100000 [00:00<00:11, 8212.00it/s][A[A

  7%|▋         | 6574/100000 [00:00<00:11, 8201.22it/s][A[A

  7%|▋         | 7397/100000 [00:00<00:11, 8209.49it/s][A[A

  8%|▊         | 8211/100000 [00:01<00:11, 8187.23it/s][A[A

  9%|▉         | 9034/100000 [00:01<00:11, 8198.89it/s][A[A

 10%|▉         | 9855/100000 [00:01<00:10, 8199.70it/s][A[A

 11%|█         | 10664/100000 [00:01<00:11, 7641.49it/s][A[A

 11%|█▏        | 11429/100000 [00:01<00:11, 7643.09it/s][A[A

 12%|█▏        | 12250/100000 [00:01<00:11, 7802.84it/s][A[A

 

In [108]:
for p,q in tqdm(random_vector_pairs):
    s_p, v_p = pfv_to_sv(p)
    s_q, v_q = pfv_to_sv(q)
    assert np.array_equal(agree(p,q), np.array(list(agree_ba(s_p, v_p, s_q, v_q)), dtype=myint))
    assert np.array_equal(intersection(p,q), sv_to_pfv(*intersection_ba(s_p, v_p, s_q, v_q)))



  0%|          | 0/100000 [00:00<?, ?it/s][A[A

  1%|          | 754/100000 [00:00<00:13, 7531.37it/s][A[A

  2%|▏         | 1537/100000 [00:00<00:12, 7616.22it/s][A[A

  2%|▏         | 2327/100000 [00:00<00:12, 7699.15it/s][A[A

  3%|▎         | 3122/100000 [00:00<00:12, 7770.80it/s][A[A

  4%|▍         | 3903/100000 [00:00<00:12, 7781.55it/s][A[A

  5%|▍         | 4698/100000 [00:00<00:12, 7828.82it/s][A[A

  5%|▌         | 5496/100000 [00:00<00:12, 7871.62it/s][A[A

  6%|▋         | 6297/100000 [00:00<00:11, 7910.22it/s][A[A

  7%|▋         | 7097/100000 [00:00<00:11, 7934.40it/s][A[A

  8%|▊         | 7891/100000 [00:01<00:11, 7935.85it/s][A[A

  9%|▊         | 8693/100000 [00:01<00:11, 7958.60it/s][A[A

  9%|▉         | 9491/100000 [00:01<00:11, 7964.84it/s][A[A

 10%|█         | 10283/100000 [00:01<00:11, 7950.87it/s][A[A

 11%|█         | 11072/100000 [00:01<00:11, 7931.11it/s][A[A

 12%|█▏        | 11861/100000 [00:01<00:11, 7784.71it/s][A[A

 

In [109]:
def in_extension_ba(s_p, v_p, s_o, v_o):
    return agree_ba_(s_p, v_p, s_o, v_o)

### Performance evaluation

Now we compare timing:

In [110]:
agreeing_vector_pairs_ba = [(pfv_to_sv(u)[0], pfv_to_sv(u)[1], pfv_to_sv(v)[0], pfv_to_sv(v)[1]) for u,v in agreeing_vector_pairs]

In [111]:
random_vector_pairs_ba = [(pfv_to_sv(u)[0], pfv_to_sv(u)[1], pfv_to_sv(v)[0], pfv_to_sv(v)[1]) for u,v in random_vector_pairs]

In [112]:
def unpack_pfv_pair(pair):
    p = pair[0]
    q = pair[1]
    s_p, v_p = pfv_to_sv(p)
    s_q, v_q = pfv_to_sv(q)
    return (s_p, v_p, s_q, v_q)

# def unpack_sv_pair_pair(sv_pair_pair):
#     s_p, v_p = sv_pair_pair[0][0], sv_pair_pair[0][1]
#     s_q, v_q = sv_pair_pair[1][2], sv_pair_pair[1][1]
#     return (s_p, v_p, s_q, v_q)

In [113]:
%%timeit

list(starmap(agree, random_vector_pairs))

2.48 s ± 16.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [114]:
%%timeit

list(starmap(agree_ba, random_vector_pairs_ba))

207 ms ± 5.67 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [115]:
%%timeit

list(starmap(union, agreeing_vector_pairs))

102 ms ± 654 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [116]:
%%timeit

list(starmap(union_ba, agreeing_vector_pairs_ba))

55.4 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [117]:
%%timeit

list(starmap(intersection, random_vector_pairs))

232 ms ± 4.88 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [118]:
%%timeit

list(starmap(intersection_ba, random_vector_pairs_ba))

185 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Conclusion

In the baseline representation:
 - `agreement` checking is very expensive, taking ≈10x longer than `union`ing and ≈5x longer than `intersect`ing

In the bitarray representation:
 - `agreement` checking and `intersection` take comparably long, and both take about 5x longer than `union`

`agreement` checking is about 5x faster with bitarrays than with the baseline representation, `intersection` is about 1.25x faster, and `union` is about 2x faster.

## Matrix extension of the baseline representation

In [119]:
random_stack_list = random_vector_pairs[:3]; random_stack_list

[(array([ 1,  0,  1,  0,  1,  0,  1, -1,  0], dtype=int8),
  array([ 1,  0,  0,  0,  0,  0,  0,  0, -1], dtype=int8)),
 (array([ 0,  1,  0, -1, -1,  0, -1, -1, -1], dtype=int8),
  array([-1,  0,  0,  0,  0,  0,  0,  1,  0], dtype=int8)),
 (array([ 0,  1,  1,  1, -1, -1,  0, -1,  0], dtype=int8),
  array([ 0,  1, -1, -1, -1,  1,  1,  0,  1], dtype=int8))]

In [120]:
random_vector_pairs[:10]

[(array([ 1,  0,  1,  0,  1,  0,  1, -1,  0], dtype=int8),
  array([ 1,  0,  0,  0,  0,  0,  0,  0, -1], dtype=int8)),
 (array([ 0,  1,  0, -1, -1,  0, -1, -1, -1], dtype=int8),
  array([-1,  0,  0,  0,  0,  0,  0,  1,  0], dtype=int8)),
 (array([ 0,  1,  1,  1, -1, -1,  0, -1,  0], dtype=int8),
  array([ 0,  1, -1, -1, -1,  1,  1,  0,  1], dtype=int8)),
 (array([ 1,  0,  0, -1,  1, -1, -1,  1, -1], dtype=int8),
  array([-1, -1,  1, -1,  1,  1, -1,  0,  0], dtype=int8)),
 (array([-1, -1,  1,  1,  0,  1, -1,  1,  0], dtype=int8),
  array([ 1,  1, -1, -1,  1,  1,  1, -1,  1], dtype=int8)),
 (array([ 0, -1, -1, -1,  0, -1,  0, -1, -1], dtype=int8),
  array([ 0,  1,  0,  1, -1, -1, -1, -1,  0], dtype=int8)),
 (array([ 0,  0,  0,  0, -1,  0, -1,  0, -1], dtype=int8),
  array([ 0, -1,  0,  0,  1,  1,  1, -1,  0], dtype=int8)),
 (array([ 0,  1,  0, -1, -1, -1,  0, -1,  0], dtype=int8),
  array([-1, -1,  0, -1,  0,  1,  0, -1, -1], dtype=int8)),
 (array([-1,  0,  1, -1, -1,  0,  0, -1,  0], dt

In [121]:
first = lambda seq: seq[0]
second = lambda seq: seq[1]

stack_a, stack_b = list(map(first, random_vector_pairs)), list(map(second, random_vector_pairs))
random_pair_stack_a, random_pair_stack_b = np.array(stack_a), np.array(stack_b)
random_pair_stack_a.dtype
random_pair_stack_b.dtype

dtype('int8')

dtype('int8')

In [122]:
stack_a, stack_b = list(map(first, agreeing_vector_pairs)), list(map(second, agreeing_vector_pairs))
agreeing_pair_stack_a, agreeing_pair_stack_b = np.array(stack_a), np.array(stack_b)
agreeing_pair_stack_a.dtype
agreeing_pair_stack_b.dtype

dtype('int8')

dtype('int8')

### Agreement testing

In [123]:
random_pair_stack_a.shape
n = random_pair_stack_a.shape[0]

(100000, 9)

In [124]:
list(starmap(agree_, random_vector_pairs));

In [125]:
vector_agree__results = np.array([agree_(random_pair_stack_a[i],random_pair_stack_b[i]) for i in range(n)])
vector_agree__results.shape

(100000,)

In [126]:
def agree_mat(A,B):
    '''
    Given two matrices A::(n,m) and B::(n,m), 
    return C::(n,1) where 
    C[i] = 1 iff A[i] and B[i] agree at all indices
    and 0 otherwise.
    '''
    # (x == 0 or y == 0) or ((x != 0 and y != 0) and (x == y))
    A_unspecified = A == 0
    B_unspecified = B == 0
    A_or_B_unspecified = A_unspecified | B_unspecified
    
    A_specified = A != 0
    B_specified = B != 0
    A_and_B_specified = A_specified & B_specified
    A_equal_B = np.equal(A,B)
    A_B_both_specified_and_equal = A_and_B_specified & A_equal_B

    ag = A_or_B_unspecified | A_B_both_specified_and_equal
#     return ag
    result = np.prod(ag, axis=-1, dtype=myint)
    return result

In [127]:
matrix_agree_result = agree_mat(random_pair_stack_a, random_pair_stack_b)
matrix_agree_result.shape

(100000,)

In [128]:
np.array_equal(matrix_agree_result, vector_agree__results)

True

In [129]:
for i in range(n):
    u = random_pair_stack_a[i]
    v = random_pair_stack_b[i]
    assert agree_(u,v) == agree_mat(u,v), '{0}, {1} -> {2} vs. {3}'.format(u,v, agree_(u,v), agree_mat(u,v, True))

### Union

In [130]:
vector_union_results = np.array([union(agreeing_pair_stack_a[i],agreeing_pair_stack_b[i]) for i in range(n)])
vector_union_results.shape

(100000, 9)

In [131]:
union(agreeing_pair_stack_a, agreeing_pair_stack_b)

array([[ 1, -1,  1, ...,  1,  1, -1],
       [-1, -1,  0, ..., -1,  1, -1],
       [ 1,  1,  1, ..., -1,  1,  1],
       ..., 
       [ 1, -1,  1, ...,  1,  1, -1],
       [ 1,  0,  1, ..., -1,  1, -1],
       [-1,  1,  1, ..., -1,  1, -1]], dtype=int8)

In [132]:
np.array_equal(vector_union_results, union(agreeing_pair_stack_a, agreeing_pair_stack_b) )

True

### Intersection

In [133]:
vector_intersection_results = np.array([intersection(random_pair_stack_a[i],random_pair_stack_b[i]) for i in range(n)])
vector_intersection_results.shape

(100000, 9)

In [134]:
intersection(agreeing_pair_stack_a, agreeing_pair_stack_b)

array([[ 0, -1,  0, ...,  0,  0, -1],
       [ 0,  0,  0, ...,  0,  1,  0],
       [ 1,  1,  1, ...,  0,  0,  1],
       ..., 
       [ 1,  0,  0, ...,  1,  0, -1],
       [ 0,  0,  1, ..., -1,  0,  0],
       [ 0,  0,  1, ...,  0,  1,  0]], dtype=int8)

In [135]:
np.array_equal(vector_intersection_results, intersection(random_pair_stack_a, random_pair_stack_b) )

True

### Extension

Using `agree_mat` we can define a faster method for calculating extensions:

In [136]:
def extension_(pfv):
    return agree_mat(pfv, objectMap)

In [137]:
p = make_random_pfv()
p
' '
extension(p)
extension_alt(p)
extension_(p)

array([ 1,  0, -1,  1,  1,  0,  0,  0,  0], dtype=int8)

' '

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0], dtype=int8)

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0], dtype=int8)

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0], dtype=int8)

In [139]:
for p in tqdm(random_vectors):
    assert np.array_equal(extension(p), extension_(p)), 'Disagreement on {0}: {1} vs. {2}'.format(p, extension(p), extension_alt(p))



  0%|          | 0/100000 [00:00<?, ?it/s][A[A

  0%|          | 11/100000 [00:00<17:54, 93.05it/s][A[A

  0%|          | 22/100000 [00:00<17:11, 96.88it/s][A[A

  0%|          | 31/100000 [00:00<17:47, 93.61it/s][A[A

  0%|          | 42/100000 [00:00<17:22, 95.84it/s][A[A

  0%|          | 53/100000 [00:00<17:05, 97.42it/s][A[A

  0%|          | 65/100000 [00:00<16:56, 98.32it/s][A[A

  0%|          | 74/100000 [00:00<18:31, 89.89it/s][A[A

  0%|          | 85/100000 [00:00<17:47, 93.59it/s][A[A

  0%|          | 94/100000 [00:00<18:37, 89.38it/s][A[A

  0%|          | 103/100000 [00:01<18:36, 89.46it/s][A[A

  0%|          | 112/100000 [00:01<19:01, 87.49it/s][A[A

  0%|          | 122/100000 [00:01<18:58, 87.71it/s][A[A

  0%|          | 131/100000 [00:01<19:05, 87.17it/s][A[A

  0%|          | 142/100000 [00:01<17:59, 92.52it/s][A[A

  0%|          | 152/100000 [00:01<18:15, 91.17it/s][A[A

  0%|          | 163/100000 [00:01<17:30, 95.06it/s][A[

  1%|▏         | 1374/100000 [00:15<18:05, 90.87it/s][A[A

  1%|▏         | 1384/100000 [00:15<21:05, 77.90it/s][A[A

  1%|▏         | 1394/100000 [00:15<19:42, 83.40it/s][A[A

  1%|▏         | 1405/100000 [00:15<18:23, 89.38it/s][A[A

  1%|▏         | 1415/100000 [00:15<19:54, 82.50it/s][A[A

  1%|▏         | 1426/100000 [00:15<18:47, 87.46it/s][A[A

  1%|▏         | 1436/100000 [00:16<18:49, 87.27it/s][A[A

  1%|▏         | 1445/100000 [00:16<23:14, 70.68it/s][A[A

  1%|▏         | 1455/100000 [00:16<21:13, 77.40it/s][A[A

  1%|▏         | 1464/100000 [00:16<20:36, 79.67it/s][A[A

  1%|▏         | 1474/100000 [00:16<19:30, 84.20it/s][A[A

  1%|▏         | 1483/100000 [00:16<19:38, 83.56it/s][A[A

  1%|▏         | 1494/100000 [00:16<19:12, 85.43it/s][A[A

  2%|▏         | 1504/100000 [00:16<18:22, 89.31it/s][A[A

  2%|▏         | 1515/100000 [00:17<17:32, 93.61it/s][A[A

  2%|▏         | 1527/100000 [00:17<16:32, 99.20it/s][A[A

  2%|▏         | 1538/10

  3%|▎         | 2724/100000 [00:30<19:08, 84.73it/s][A[A

  3%|▎         | 2735/100000 [00:30<18:35, 87.20it/s][A[A

  3%|▎         | 2744/100000 [00:30<20:19, 79.75it/s][A[A

  3%|▎         | 2754/100000 [00:30<19:21, 83.75it/s][A[A

  3%|▎         | 2763/100000 [00:30<19:02, 85.14it/s][A[A

  3%|▎         | 2772/100000 [00:31<20:43, 78.20it/s][A[A

  3%|▎         | 2783/100000 [00:31<18:59, 85.29it/s][A[A

  3%|▎         | 2793/100000 [00:31<18:16, 88.65it/s][A[A

  3%|▎         | 2804/100000 [00:31<17:32, 92.35it/s][A[A

  3%|▎         | 2815/100000 [00:31<17:10, 94.30it/s][A[A

  3%|▎         | 2825/100000 [00:31<17:14, 93.92it/s][A[A

  3%|▎         | 2835/100000 [00:31<17:26, 92.86it/s][A[A

  3%|▎         | 2845/100000 [00:31<17:36, 91.97it/s][A[A

  3%|▎         | 2856/100000 [00:31<17:12, 94.10it/s][A[A

  3%|▎         | 2866/100000 [00:32<18:55, 85.54it/s][A[A

  3%|▎         | 2877/100000 [00:32<18:00, 89.85it/s][A[A

  3%|▎         | 2888/10

  4%|▍         | 4076/100000 [00:45<17:20, 92.23it/s][A[A

  4%|▍         | 4086/100000 [00:45<17:20, 92.17it/s][A[A

  4%|▍         | 4096/100000 [00:45<17:53, 89.33it/s][A[A

  4%|▍         | 4105/100000 [00:45<18:05, 88.34it/s][A[A

  4%|▍         | 4115/100000 [00:46<18:05, 88.31it/s][A[A

  4%|▍         | 4124/100000 [00:46<19:00, 84.08it/s][A[A

  4%|▍         | 4134/100000 [00:46<18:19, 87.16it/s][A[A

  4%|▍         | 4144/100000 [00:46<17:39, 90.49it/s][A[A

  4%|▍         | 4154/100000 [00:46<18:27, 86.50it/s][A[A

  4%|▍         | 4163/100000 [00:46<18:53, 84.52it/s][A[A

  4%|▍         | 4173/100000 [00:46<18:20, 87.08it/s][A[A

  4%|▍         | 4183/100000 [00:46<17:50, 89.50it/s][A[A

  4%|▍         | 4193/100000 [00:46<17:27, 91.47it/s][A[A

  4%|▍         | 4203/100000 [00:47<17:14, 92.58it/s][A[A

  4%|▍         | 4213/100000 [00:47<17:08, 93.12it/s][A[A

  4%|▍         | 4224/100000 [00:47<16:31, 96.58it/s][A[A

  4%|▍         | 4234/10

  5%|▌         | 5419/100000 [01:00<19:10, 82.23it/s][A[A

  5%|▌         | 5428/100000 [01:00<19:45, 79.78it/s][A[A

  5%|▌         | 5438/100000 [01:00<18:47, 83.83it/s][A[A

  5%|▌         | 5448/100000 [01:00<18:16, 86.23it/s][A[A

  5%|▌         | 5457/100000 [01:00<18:34, 84.81it/s][A[A

  5%|▌         | 5467/100000 [01:01<17:46, 88.64it/s][A[A

  5%|▌         | 5476/100000 [01:01<18:14, 86.35it/s][A[A

  5%|▌         | 5487/100000 [01:01<17:12, 91.54it/s][A[A

  5%|▌         | 5497/100000 [01:01<17:09, 91.82it/s][A[A

  6%|▌         | 5508/100000 [01:01<16:46, 93.86it/s][A[A

  6%|▌         | 5518/100000 [01:01<16:55, 93.02it/s][A[A

  6%|▌         | 5528/100000 [01:01<16:54, 93.09it/s][A[A

  6%|▌         | 5538/100000 [01:01<19:33, 80.48it/s][A[A

  6%|▌         | 5547/100000 [01:02<20:08, 78.14it/s][A[A

  6%|▌         | 5557/100000 [01:02<19:03, 82.61it/s][A[A

  6%|▌         | 5567/100000 [01:02<18:42, 84.11it/s][A[A

  6%|▌         | 5576/10

  7%|▋         | 6776/100000 [01:15<17:15, 90.00it/s][A[A

  7%|▋         | 6787/100000 [01:15<16:31, 94.01it/s][A[A

  7%|▋         | 6797/100000 [01:15<16:31, 93.99it/s][A[A

  7%|▋         | 6807/100000 [01:15<16:25, 94.58it/s][A[A

  7%|▋         | 6817/100000 [01:16<17:07, 90.68it/s][A[A

  7%|▋         | 6828/100000 [01:16<16:33, 93.83it/s][A[A

  7%|▋         | 6838/100000 [01:16<16:58, 91.45it/s][A[A

  7%|▋         | 6848/100000 [01:16<17:29, 88.79it/s][A[A

  7%|▋         | 6858/100000 [01:16<16:59, 91.37it/s][A[A

  7%|▋         | 6868/100000 [01:16<18:11, 85.32it/s][A[A

  7%|▋         | 6878/100000 [01:16<17:28, 88.79it/s][A[A

  7%|▋         | 6888/100000 [01:16<17:02, 91.09it/s][A[A

  7%|▋         | 6899/100000 [01:17<16:20, 94.92it/s][A[A

  7%|▋         | 6909/100000 [01:17<17:00, 91.27it/s][A[A

  7%|▋         | 6919/100000 [01:17<16:56, 91.61it/s][A[A

  7%|▋         | 6929/100000 [01:17<18:35, 83.47it/s][A[A

  7%|▋         | 6938/10

  8%|▊         | 8126/100000 [01:30<16:59, 90.15it/s][A[A

  8%|▊         | 8136/100000 [01:30<16:39, 91.94it/s][A[A

  8%|▊         | 8146/100000 [01:31<17:17, 88.56it/s][A[A

  8%|▊         | 8156/100000 [01:31<17:00, 90.03it/s][A[A

  8%|▊         | 8167/100000 [01:31<16:28, 92.92it/s][A[A

  8%|▊         | 8177/100000 [01:31<16:53, 90.64it/s][A[A

  8%|▊         | 8187/100000 [01:31<17:16, 88.61it/s][A[A

  8%|▊         | 8196/100000 [01:31<17:15, 88.67it/s][A[A

  8%|▊         | 8206/100000 [01:31<16:47, 91.12it/s][A[A

  8%|▊         | 8216/100000 [01:31<16:39, 91.84it/s][A[A

  8%|▊         | 8226/100000 [01:31<16:27, 92.92it/s][A[A

  8%|▊         | 8236/100000 [01:31<16:17, 93.88it/s][A[A

  8%|▊         | 8246/100000 [01:32<16:25, 93.08it/s][A[A

  8%|▊         | 8257/100000 [01:32<15:42, 97.38it/s][A[A

  8%|▊         | 8267/100000 [01:32<16:21, 93.47it/s][A[A

  8%|▊         | 8277/100000 [01:32<17:26, 87.64it/s][A[A

  8%|▊         | 8286/10

  9%|▉         | 9492/100000 [01:45<15:08, 99.60it/s] [A[A

 10%|▉         | 9503/100000 [01:45<15:37, 96.49it/s][A[A

 10%|▉         | 9515/100000 [01:46<14:58, 100.76it/s][A[A

 10%|▉         | 9526/100000 [01:46<15:18, 98.53it/s] [A[A

 10%|▉         | 9537/100000 [01:46<15:04, 99.98it/s][A[A

 10%|▉         | 9549/100000 [01:46<14:36, 103.25it/s][A[A

 10%|▉         | 9560/100000 [01:46<14:47, 101.93it/s][A[A

 10%|▉         | 9571/100000 [01:46<15:05, 99.83it/s] [A[A

 10%|▉         | 9582/100000 [01:46<14:51, 101.45it/s][A[A

 10%|▉         | 9593/100000 [01:46<15:29, 97.23it/s] [A[A

 10%|▉         | 9604/100000 [01:46<15:04, 99.96it/s][A[A

 10%|▉         | 9615/100000 [01:47<15:15, 98.72it/s][A[A

 10%|▉         | 9625/100000 [01:47<15:51, 94.97it/s][A[A

 10%|▉         | 9635/100000 [01:47<16:12, 92.87it/s][A[A

 10%|▉         | 9645/100000 [01:47<18:01, 83.52it/s][A[A

 10%|▉         | 9654/100000 [01:47<19:42, 76.42it/s][A[A

 10%|▉         |

 11%|█         | 10825/100000 [02:00<15:41, 94.69it/s][A[A

 11%|█         | 10836/100000 [02:00<15:16, 97.29it/s][A[A

 11%|█         | 10846/100000 [02:00<15:27, 96.08it/s][A[A

 11%|█         | 10856/100000 [02:00<15:48, 94.00it/s][A[A

 11%|█         | 10866/100000 [02:00<16:02, 92.62it/s][A[A

 11%|█         | 10877/100000 [02:01<15:29, 95.91it/s][A[A

 11%|█         | 10887/100000 [02:01<15:43, 94.48it/s][A[A

 11%|█         | 10897/100000 [02:01<15:47, 94.06it/s][A[A

 11%|█         | 10907/100000 [02:01<15:53, 93.47it/s][A[A

 11%|█         | 10917/100000 [02:01<17:04, 86.98it/s][A[A

 11%|█         | 10926/100000 [02:01<17:45, 83.63it/s][A[A

 11%|█         | 10937/100000 [02:01<16:32, 89.75it/s][A[A

 11%|█         | 10947/100000 [02:01<18:30, 80.19it/s][A[A

 11%|█         | 10958/100000 [02:02<17:31, 84.68it/s][A[A

 11%|█         | 10970/100000 [02:02<16:14, 91.37it/s][A[A

 11%|█         | 10980/100000 [02:02<17:01, 87.14it/s][A[A

 11%|█  

 12%|█▏        | 12154/100000 [02:15<17:23, 84.16it/s][A[A

 12%|█▏        | 12164/100000 [02:15<17:00, 86.08it/s][A[A

 12%|█▏        | 12174/100000 [02:15<16:27, 88.94it/s][A[A

 12%|█▏        | 12183/100000 [02:15<17:25, 83.97it/s][A[A

 12%|█▏        | 12194/100000 [02:15<16:16, 89.95it/s][A[A

 12%|█▏        | 12204/100000 [02:16<16:42, 87.54it/s][A[A

 12%|█▏        | 12213/100000 [02:16<17:41, 82.70it/s][A[A

 12%|█▏        | 12224/100000 [02:16<16:45, 87.30it/s][A[A

 12%|█▏        | 12234/100000 [02:16<16:25, 89.03it/s][A[A

 12%|█▏        | 12245/100000 [02:16<15:41, 93.16it/s][A[A

 12%|█▏        | 12255/100000 [02:16<16:03, 91.09it/s][A[A

 12%|█▏        | 12266/100000 [02:16<15:39, 93.34it/s][A[A

 12%|█▏        | 12276/100000 [02:16<16:00, 91.29it/s][A[A

 12%|█▏        | 12286/100000 [02:16<17:59, 81.23it/s][A[A

 12%|█▏        | 12295/100000 [02:17<19:09, 76.29it/s][A[A

 12%|█▏        | 12305/100000 [02:17<17:58, 81.28it/s][A[A

 12%|█▏ 

 13%|█▎        | 13496/100000 [02:30<16:51, 85.53it/s][A[A

 14%|█▎        | 13507/100000 [02:30<15:56, 90.42it/s][A[A

 14%|█▎        | 13517/100000 [02:30<16:27, 87.54it/s][A[A

 14%|█▎        | 13526/100000 [02:30<16:26, 87.65it/s][A[A

 14%|█▎        | 13538/100000 [02:30<15:25, 93.44it/s][A[A

 14%|█▎        | 13549/100000 [02:30<15:03, 95.63it/s][A[A

 14%|█▎        | 13559/100000 [02:31<14:57, 96.32it/s][A[A

 14%|█▎        | 13569/100000 [02:31<14:51, 96.92it/s][A[A

 14%|█▎        | 13579/100000 [02:31<14:59, 96.03it/s][A[A

 14%|█▎        | 13589/100000 [02:31<17:43, 81.24it/s][A[A

 14%|█▎        | 13598/100000 [02:31<17:23, 82.83it/s][A[A

 14%|█▎        | 13608/100000 [02:31<16:36, 86.70it/s][A[A

 14%|█▎        | 13617/100000 [02:31<16:30, 87.18it/s][A[A

 14%|█▎        | 13627/100000 [02:31<16:06, 89.34it/s][A[A

 14%|█▎        | 13637/100000 [02:31<15:43, 91.56it/s][A[A

 14%|█▎        | 13648/100000 [02:32<15:04, 95.50it/s][A[A

 14%|█▎ 

 15%|█▍        | 14829/100000 [02:45<15:52, 89.39it/s][A[A

 15%|█▍        | 14839/100000 [02:45<16:02, 88.44it/s][A[A

 15%|█▍        | 14850/100000 [02:45<15:27, 91.85it/s][A[A

 15%|█▍        | 14861/100000 [02:45<14:50, 95.60it/s][A[A

 15%|█▍        | 14873/100000 [02:45<14:02, 101.06it/s][A[A

 15%|█▍        | 14884/100000 [02:45<15:33, 91.19it/s] [A[A

 15%|█▍        | 14894/100000 [02:45<15:48, 89.71it/s][A[A

 15%|█▍        | 14904/100000 [02:45<16:03, 88.28it/s][A[A

 15%|█▍        | 14915/100000 [02:46<15:12, 93.20it/s][A[A

 15%|█▍        | 14926/100000 [02:46<14:51, 95.38it/s][A[A

 15%|█▍        | 14937/100000 [02:46<14:48, 95.79it/s][A[A

 15%|█▍        | 14947/100000 [02:46<14:40, 96.58it/s][A[A

 15%|█▍        | 14958/100000 [02:46<14:23, 98.51it/s][A[A

 15%|█▍        | 14969/100000 [02:46<14:27, 98.04it/s][A[A

 15%|█▍        | 14979/100000 [02:46<14:30, 97.64it/s][A[A

 15%|█▍        | 14990/100000 [02:46<14:16, 99.26it/s][A[A

 15%|█

 16%|█▌        | 16179/100000 [02:59<14:48, 94.34it/s][A[A

 16%|█▌        | 16189/100000 [02:59<14:48, 94.35it/s][A[A

 16%|█▌        | 16200/100000 [03:00<14:22, 97.17it/s][A[A

 16%|█▌        | 16210/100000 [03:00<14:45, 94.66it/s][A[A

 16%|█▌       

In [None]:
%%timeit

list(map(extension, random_vectors))

In [None]:
%%timeit

list(map(extension_, random_vectors))

### Conclusion

The `union` and `intersection` operations extend naturally to vector stacks (matrices), and vectorized versions of the boolean formulas discovered during exploration of bitarrays make a vectorized version of `agreement` easily definable.

All three operations are enormously faster than either the baseline representation or the bitarray methods.

The vectorized version of agreement naturally permits a moderately (≈5-6x) faster method of calculating the extension of a partial feature vector.

## `pytorch` and gpus

The next logical question is whether gpus can usefully accelerate computation...

In [None]:
import torch

In [None]:
torch.set_default_tensor_type('torch.cuda.FloatTensor')
gpu_int8 = torch.cuda.CharTensor
gpu_int16 = torch.cuda.ShortTensor
my_torch_type = gpu_int8
def t(ndarray):
    if ndarray.dtype == myint:
        return torch.tensor(ndarray.astype(np.int16)).type(my_torch_type)
    return torch.tensor(ndarray).type(my_torch_type)

In [None]:
objects_t = t(np.array(objects))
objectMap_t = t(objectMap)

### Agreement

In [252]:
p

array([1, 0, 1], dtype=int8)

In [253]:
q

array([ 0,  1, -1], dtype=int8)

In [254]:
np.equal(p,q)
torch.eq(t(p), t(q))

array([False, False, False], dtype=bool)

tensor([0, 0, 0], dtype=torch.uint8)

In [255]:
tens = torch.tensor([1,1,1])
tens = tens.type(torch.cuda.ByteTensor)
tens
tens.type()
tens.all()

tensor([1, 1, 1], dtype=torch.uint8)

'torch.cuda.ByteTensor'

tensor(1, dtype=torch.uint8)

In [256]:
quux = torch.tensor([[1,1,1],[1,0,1]])
quux = quux.type(gpu_int8)
quux
torch.split(quux, 1, dim=0)
tuple(map(torch.prod, torch.split(quux, 1, dim=0) ))

tensor([[1, 1, 1],
        [1, 0, 1]], dtype=torch.int8)

(tensor([[1, 1, 1]], dtype=torch.int8), tensor([[1, 0, 1]], dtype=torch.int8))

(tensor(1), tensor(0))

In [257]:
def agree_mat_t(A,B):
    '''
    Given two matrices (torch tensors) A::(n,m) and B::(n,m), 
    return C::(n,1) where 
    C[i] = 1 iff A[i] and B[i] agree at all indices
    and 0 otherwise.
    '''
    # (x == 0 or y == 0) or ((x != 0 and y != 0) and (x == y))
    A_unspecified = A == 0
    B_unspecified = B == 0
    A_or_B_unspecified = A_unspecified | B_unspecified
    
    A_specified = A != 0
    B_specified = B != 0
    A_and_B_specified = A_specified & B_specified
    A_equal_B = torch.eq(A,B)
    A_B_both_specified_and_equal = A_and_B_specified & A_equal_B

    ag = A_or_B_unspecified | A_B_both_specified_and_equal
#     return ag
#     result = np.prod(ag, axis=-1, dtype=myint)
    result = torch.prod(ag, dim=1,dtype=my_torch_type)#, dtype=gpu_int8)
#     result = ag.type(torch.cuda.ByteTensor).all()
    return result#.type(my_torch_type)

In [285]:
%%timeit

agree_mat(random_pair_stack_a, random_pair_stack_b)

1.65 ms ± 24.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [281]:
random_pair_stack_a_t, random_pair_stack_b_t = t(random_pair_stack_a), t(random_pair_stack_b)

In [284]:
%%timeit

agree_mat_t(random_pair_stack_a_t, random_pair_stack_b_t)

197 µs ± 125 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [259]:
np.array_equal(
    matrix_agree_result,
    agree_mat_t(t(random_pair_stack_a), t(random_pair_stack_b)).cpu().type(torch.int16).numpy()
)

True

### Union

In [260]:
def union_t(u, v):
#     if CAREFUL:
#         assert agree_(u,v)
    return torch.sign(u + v)

In [286]:
%%timeit

union(agreeing_pair_stack_a, agreeing_pair_stack_b)

25.2 µs ± 506 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [287]:
agreeing_pair_stack_a_t, agreeing_pair_stack_b_t = t(agreeing_pair_stack_a), t(agreeing_pair_stack_b)

In [288]:
%%timeit

union_t(agreeing_pair_stack_a_t, agreeing_pair_stack_b_t)

19.1 µs ± 642 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [261]:
np.array_equal(
    union(agreeing_pair_stack_a, agreeing_pair_stack_b),
    union_t(t(agreeing_pair_stack_a), t(agreeing_pair_stack_b)).cpu().type(torch.int16).numpy()
)

True

### Intersection

In [262]:
p_t = t(p); p_t
q_t = t(q); q_t

tensor([1, 0, 1], dtype=torch.int8)

tensor([ 0,  1, -1], dtype=torch.int8)

In [267]:
def intersection_t(u, v):
    s = u + v
    e = torch.eq(u,v).type(torch.cuda.ShortTensor)
#     p = e * s #<- use this if default int tensor is 16 bit
    p = e * s.type(torch.cuda.ShortTensor)
    result = torch.sign(p)
    return result
#     return torch.sign(  torch.eq(u, v) * (u + v) )

In [290]:
%%timeit

intersection(random_pair_stack_a, random_pair_stack_b)

67.2 µs ± 3.23 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [291]:
%%timeit

intersection_t(random_pair_stack_a_t, random_pair_stack_b_t)

68.4 µs ± 1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [268]:
np.array_equal(
    intersection(agreeing_pair_stack_a, agreeing_pair_stack_b),
    intersection_t(t(agreeing_pair_stack_a), t(agreeing_pair_stack_b)).cpu().type(torch.int16).numpy()
)

True

### Extension

In [328]:
# objectMap_t = objectMap_t.type(torch.cuda.ShortTensor)

In [329]:
def extension_t(pfv):
    return agree_mat_t(pfv, objectMap_t)

In [330]:
%%timeit

list(map(extension_, random_vectors))

1.35 s ± 28.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [331]:
np.array(list(map(extension_, random_vectors))).shape

(100000, 4)

In [332]:
np.apply_along_axis(extension_, axis=1, arr=random_vectors).shape

(100000, 4)

In [333]:
%%timeit

np.apply_along_axis(extension_, axis=1, arr=random_vectors).shape

1.77 s ± 44.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [334]:
random_vectors_t = t(np.array(random_vectors))

In [335]:
%%timeit

list(map(extension_t, random_vectors_t))

RuntimeError: Expected object of scalar type Char but got scalar type Short for argument #2 'other'

In [None]:
%%timeit

torch.stack([extension_t(v) for v in torch.unbind(random_vectors_t, dim=0)])

The `pytorch` cell time is probably terribly slow because of the `list` and `map` (or list comprehension) operations happening in Python and on/involving the CPU...

In [None]:
%%timeit

extension_(choice(random_vectors))

In [None]:
%%timeit

extension_t(choice(random_vectors_t))

In [None]:
%%timeit

for v in random_vectors:
    extension_(v)

In [None]:
%%timeit

for v in random_vectors_t:
    extension_t(v)

In [None]:
random_v = choice(random_vectors)

In [None]:
%%timeit

extension_(random_v)

In [None]:
random_v_t = t(random_v)

In [None]:
%%timeit

extension_t(random_v_t)

In [270]:
for p in tqdm(random_vectors):
    assert np.array_equal(extension_(p), extension_t(t(p)).cpu().type(torch.int16).numpy()), 'Disagreement on {0}: {1} vs. {2}'.format(p, extension_(p), extension_t(t(p)))

### Conclusion

At a relatively low number of features (`m = 3`)...
 - `agreement` checking of entire matrices is ≈10x faster using `pytorch`+a GPU than using `numpy`.
 - `union` and `intersection` are about as fast in both implementations
 - `extension` seems abominably slower in pytorch than in numpy for some reason...