In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

**Eric Meinhardt / emeinhardt@ucsd.edu**

In [2]:
import numpy as np
myint = np.int8

In [92]:
from itertools import starmap, product, combinations, chain

In [93]:
from functools import reduce

In [4]:
from tqdm import tqdm

from joblib import Parallel, delayed, Memory

J = -1
BACKEND = 'multiprocessing'
# BACKEND = 'loky'
V = 10
PREFER = 'processes'
# PREFER = 'threads'

def par(gen_expr, j=None, backend=None, verbose=None, prefer=None):
    if j is None:
        j = J
    if backend is None:
        backend = BACKEND
    if verbose is None:
        verbose = V
    if prefer is None:
        prefer = PREFER
    return Parallel(n_jobs=j, backend=backend, verbose=verbose, prefer=prefer)(gen_expr)

def identity(x):
    return x

In [5]:
from random import choice

In [6]:
CAREFUL = False

In [94]:
import sparse

# Read in (or make) object vectors

## Make

In [18]:
m = 5

In [19]:
max_num_objects = 2 ** m
max_num_objects

max_num_partial_fvs = (2 + 1) ** m
max_num_partial_fvs

32

243

In [20]:
def make_random_pfv():
    return np.random.randint(3, size=m, dtype=myint) - 1

In [21]:
max_num_objects
actual_num_objects = np.random.randint(max_num_objects)
# actual_num_objects = 40
actual_num_objects

assert actual_num_objects < max_num_objects

32

23

In [22]:
objects = tuple(set([tuple(np.random.randint(2, size=m)) for each in range(actual_num_objects)]))
objects = tuple(map(np.array, objects))
l = len(objects)

def zeroToMinusOne(u):
    return np.array([x if x == 1 else -1 for x in u])

objects = tuple([zeroToMinusOne(o) for o in objects])


actual_num_objects = len(objects)
actual_num_objects
objects

19

(array([ 1,  1,  1, -1, -1]),
 array([ 1,  1, -1,  1, -1]),
 array([-1, -1,  1,  1, -1]),
 array([ 1,  1, -1, -1, -1]),
 array([ 1,  1,  1,  1, -1]),
 array([-1, -1,  1, -1,  1]),
 array([ 1,  1,  1, -1,  1]),
 array([-1,  1,  1, -1,  1]),
 array([ 1, -1, -1,  1,  1]),
 array([-1,  1,  1,  1, -1]),
 array([-1,  1, -1, -1, -1]),
 array([-1, -1, -1, -1,  1]),
 array([-1,  1,  1, -1, -1]),
 array([-1, -1,  1,  1,  1]),
 array([ 1, -1, -1,  1, -1]),
 array([1, 1, 1, 1, 1]),
 array([-1,  1, -1, -1,  1]),
 array([-1, -1,  1, -1, -1]),
 array([ 1, -1, -1, -1,  1]))

In [23]:
objectMap = np.array([objects[i] for i in range(l)])
objectMap.shape
objectMap
objectMap[0]

O = objectMap

(19, 5)

array([[ 1,  1,  1, -1, -1],
       [ 1,  1, -1,  1, -1],
       [-1, -1,  1,  1, -1],
       [ 1,  1, -1, -1, -1],
       [ 1,  1,  1,  1, -1],
       [-1, -1,  1, -1,  1],
       [ 1,  1,  1, -1,  1],
       [-1,  1,  1, -1,  1],
       [ 1, -1, -1,  1,  1],
       [-1,  1,  1,  1, -1],
       [-1,  1, -1, -1, -1],
       [-1, -1, -1, -1,  1],
       [-1,  1,  1, -1, -1],
       [-1, -1,  1,  1,  1],
       [ 1, -1, -1,  1, -1],
       [ 1,  1,  1,  1,  1],
       [-1,  1, -1, -1,  1],
       [-1, -1,  1, -1, -1],
       [ 1, -1, -1, -1,  1]])

array([ 1,  1,  1, -1, -1])

## Read-in

In [None]:
# m = 5

In [None]:
# O = 

# Operations 

## Make generator vectors

In [8]:
def make_generator_vectors(num_features):
    basis_vectors = [np.zeros(num_features, dtype=myint) for each in range(num_features)]
    basis_vectors_neg = [np.zeros(num_features, dtype=myint) for each in range(num_features)]
    for i,v in enumerate(basis_vectors):
        v[i] = 1
    for i,v in enumerate(basis_vectors_neg):
        v[i] = -1
    generators = basis_vectors + basis_vectors_neg
    return generators

In [9]:
generators = make_generator_vectors(m)
generators

[array([1, 0, 0, 0, 0], dtype=int8),
 array([0, 1, 0, 0, 0], dtype=int8),
 array([0, 0, 1, 0, 0], dtype=int8),
 array([0, 0, 0, 1, 0], dtype=int8),
 array([0, 0, 0, 0, 1], dtype=int8),
 array([-1,  0,  0,  0,  0], dtype=int8),
 array([ 0, -1,  0,  0,  0], dtype=int8),
 array([ 0,  0, -1,  0,  0], dtype=int8),
 array([ 0,  0,  0, -1,  0], dtype=int8),
 array([ 0,  0,  0,  0, -1], dtype=int8)]

In [10]:
# max_num_objects = 2 ** m
# max_num_objects

# max_num_partial_fvs = (2 + 1) ** m
# max_num_partial_fvs

32

243

## Boilerplate

In [11]:
def wf_pfv(v):
    allowedValues = {-1,0,1}
    return all([x in allowedValues for x in v])

In [13]:
def wf_tfv(v):
    allowedValues = {-1,1}
    return all([x in allowedValues for x in v])

In [12]:
def uniquify(ndarray_iterable):
    tuples = [tuple(a) for a in ndarray_iterable]
    s = set(tuples)
    arrays = [np.array(t) for t in s]
    return arrays

## Agreement

In [27]:
def ag(x,y):
    '''
    Formula:
    (x == 0 or y == 0) or ((x != 0 and y != 0) and (x == y)), where T = 1 and F = 0
    
    Pattern:
    x = x ⟶ 1
    0 = _ ⟶ 1
    _ = 0 ⟶ 1
    _ = _ ⟶ 0
    '''
    if x == y:
        return True
    elif x == 0:
        return True
    elif y == 0:
        return True
    else:
        return False

In [26]:
def agree(u,v):
    '''
    Given two vectors u and v, returns a binary vector indicating,
    elementwise, whether u and v 'agree'.
    
    agree(u[i], v[i]) iff (u[i] == 0 or v[i] == 0) or (u[i] == v[i])
    '''
#     return np.array([True if (u[i] == 0 or v[i] == 0) or (u[i] == v[i]) else False 
#                      for i in range(len(u))])
    return np.array([1 if (u[i] == 0 or v[i] == 0) or (u[i] == v[i]) else 0 
                     for i in range(len(u))], dtype=myint)

In [31]:
def agree_(u,v):
    '''
    Given two vectors u and v, return 1 iff u and v agree at all indices
    and 0 otherwise.
    '''
    ag = agree(u,v)
    return int(ag.all())

In [42]:
def agree_mat(A,B):
    '''
    Given two matrices A::(n,m) and B::(n,m), 
    return C::(n,1) where 
    C[i] = 1 iff A[i] and B[i] agree at all indices
    and 0 otherwise.
    '''
    # (x == 0 or y == 0) or ((x != 0 and y != 0) and (x == y))
    A_unspecified = A == 0
    B_unspecified = B == 0
    A_or_B_unspecified = A_unspecified | B_unspecified
    
    A_specified = A != 0
    B_specified = B != 0
    A_and_B_specified = A_specified & B_specified
    A_equal_B = np.equal(A,B)
    A_B_both_specified_and_equal = A_and_B_specified & A_equal_B

    ag = A_or_B_unspecified | A_B_both_specified_and_equal
#     return ag
    result = np.prod(ag, axis=-1, dtype=myint)
    return result

In [28]:
def make_agreeing_vector_pair(pred=None):
    u = make_random_pfv()
    v = make_random_pfv()
    if pred is None:
        while not agree_(u,v):
            u = make_random_pfv()
            v = make_random_pfv()
        return u,v
    while not agree_(u,v) and not pred(u,v):
        u = make_random_pfv()
        v = make_random_pfv()
    return u,v

In [29]:
num_test_pairs = int(1e5)
random_vector_pairs = [(make_random_pfv(), make_random_pfv()) for each in range(num_test_pairs)]
len(random_vector_pairs)

100000

In [32]:
num_test_pairs = int(1e5)
agreeing_vector_pairs = [make_agreeing_vector_pair() for each in range(num_test_pairs)]
len(agreeing_vector_pairs)

100000

In [44]:
first = lambda seq: seq[0]
second = lambda seq: seq[1]

stack_a, stack_b = list(map(first, random_vector_pairs)), list(map(second, random_vector_pairs))
random_pair_stack_a, random_pair_stack_b = np.array(stack_a), np.array(stack_b)
random_pair_stack_a.dtype
random_pair_stack_b.dtype

dtype('int8')

dtype('int8')

In [45]:
stack_a, stack_b = list(map(first, agreeing_vector_pairs)), list(map(second, agreeing_vector_pairs))
agreeing_pair_stack_a, agreeing_pair_stack_b = np.array(stack_a), np.array(stack_b)
agreeing_pair_stack_a.dtype
agreeing_pair_stack_b.dtype

dtype('int8')

dtype('int8')

In [46]:
%%timeit

list(starmap(agree_, random_vector_pairs));

1.43 s ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [47]:
%%timeit

agree_mat(random_pair_stack_a, random_pair_stack_b)

2.77 ms ± 3.11 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [48]:
np.array_equal(agree_mat(random_pair_stack_a, random_pair_stack_b), 
               list(starmap(agree_, random_vector_pairs)))

True

In [50]:
n = num_test_pairs
for i in range(n):
    u = random_pair_stack_a[i]
    v = random_pair_stack_b[i]
    assert agree_(u,v) == agree_mat(u,v), '{0}, {1} -> {2} vs. {3}'.format(u,v, agree_(u,v), agree_mat(u,v, True))

In [57]:
agreement = agree_mat

## Union

In [33]:
XYs = tuple(product((-1,0,1), (-1,0,1)))
XYs

def cup(x,y):
    '''
    Formula:
    x or y, where 1 = T, -1 = T, 0 = F
    
    Algebra:
    0 is the identity ∀x ∈ {-1,0,+1}
    x is its own identity ∀x ∈ {-1,0,+1}
    (-1 and +1 are mutual inverses, but this case shouldn't occur when agree(x,y) holds)
    
    Pattern:
    x ∪ x = x
    
    0 ∪ y = y
    x ∪ 0 = x
    
    _ ∪ _ = 0  \\ <- shouldn't occur in two pfvs that agree
    '''
    if x == 0:  #if x is unspecified, return y
        return y
    elif y == 0: #if y is unspecified, return x
        return x
    elif x == y: #if both are specified and the same, return their common value
        return x
    else: #otherwise return 0
        return 0

for x,y in XYs:
    ((x,y), cup(x,y))

((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1))

((-1, -1), -1)

((-1, 0), -1)

((-1, 1), 0)

((0, -1), -1)

((0, 0), 0)

((0, 1), 1)

((1, -1), 0)

((1, 0), 1)

((1, 1), 1)

In [34]:
def union(u, v):
    if CAREFUL:
        assert agree_(u,v)
    return np.sign(u + v)

## Intersection

In [37]:
XYs = tuple(product((-1,0,1), (-1,0,1)))
XYs 
    
def cap(x,y):
    '''
    Algebra:
    0 is the annihilating element ∀x ∈ {-1,0,+1}
    x is its own identity ∀x ∈ {-1,0,+1}
    -1 and +1 annihilate each other
    
    Pattern:
    x ∩ x = x
    
    0 ∩ _ = 0
    _ ∩ 0 = 0
    
    _ ∩ _ = 0
    '''
    if x == 0: #if x is unspecified, return 0
        return 0
    elif y == 0: #if y is unspecified, return 0
        return 0
    elif x == y: #if both are specified and the same, return their common value
        return x
    else: #otherwise return 0
        return 0

def foo(x,y):
    return np.sign( (x == y) * (x + y) )

# def bar(x,y):
#     return (x == y) * (x + y) * 0.5

# def baz(x,y):
#     return (x == y) * int((x + y) / 2)

for x,y in XYs:
#     ((x,y), cap(x,y))
#     ((x,y), cap(x,y), foo(x,y), bar(x,y), baz(x,y))
    ((x,y), cap(x,y), foo(x,y))

((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1))

((-1, -1), -1, -1)

((-1, 0), 0, 0)

((-1, 1), 0, 0)

((0, -1), 0, 0)

((0, 0), 0, 0)

((0, 1), 0, 0)

((1, -1), 0, 0)

((1, 0), 0, 0)

((1, 1), 1, 1)

In [36]:
def intersection(u, v):
    return np.sign(  np.equal(u, v) * (u + v) )

## Extension

In [24]:
def getIndex(o, O):
    matches = [i for i,v in enumerate(O) if np.array_equal(v,o)]
    if len(matches) == 0:
        return -1
    if CAREFUL:
        assert len(matches) == 1
    return matches[0]

In [25]:
def makeExtensionVector(positive_Indices, O):
    return np.array([1 if i in positive_Indices else 0 for i in np.arange(O.shape[0])], dtype=myint)

In [38]:
def extension(v, O, asIndexVector=True):
    '''
    The extension of a partial feature vector v is the set of object vectors
    (= fully specified, or 'total' feature vectors) that 'agree' with it.
    '''
    matches = tuple([o for o in O if agree_(v,o)])
#     matches = tuple([o for o in objects if agree(v,o).all()])
#     matches = np.array([1.0 if np.linalg.norm(agree(v,o), 1) == num_features else 0.0 for o in objects])
    if asIndexVector:
        return makeExtensionVector([getIndex(o, O) for o in matches], O)
    return matches

In [41]:
def ramp(M):
    return np.heaviside(M-1, 1).astype(myint)

def primed(p):
    mag_p = np.sum(np.abs(p))
    return p / mag_p

def extension_alt3(s, O):
    if np.array_equal(s, np.zeros(s.shape)):
        return np.ones((l,), dtype=myint)
    p = s
#     mag_p = np.sum(np.abs(p))
#     p_prime = p / mag_p
    return ramp( np.dot(O, primed(p)) )

In [43]:
def extension_(pfv, O):
    return agree_mat(pfv, O)

In [52]:
num_test_pairs = int(1e5)
random_vectors = [make_random_pfv() for each in tqdm(range(num_test_pairs))]
len(random_vectors)

100%|██████████| 100000/100000 [00:00<00:00, 234266.57it/s]


100000

In [53]:
%%timeit

list(map(lambda v: extension(v, O), random_vectors))

47 s ± 61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [54]:
%%timeit

list(map(lambda v: extension_alt3(v, O), random_vectors))

1.43 s ± 6.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [55]:
%%timeit

list(map(lambda v: extension_(v,O), random_vectors))

1.27 s ± 9.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [56]:
interpretation = extension_

## Entailed pfvs

In [40]:
def specifiable_zero_indices(p, ext_p):
    '''
    Given p and A::(n,m) = ⟦p⟧:
    
    If p_j = 0 and ∀i A_{i,j} = k≠0, then
    p_j is unspecified (i.e. p_j = 0) but 
    can be set to k and yield a co-extensive 
    and more specific pfv p'. (NB: p' entails 
    p.)
    
    This function returns a list of (index, value) pairs
    indicating the set of 0-valued indices of p that can 
    be specified, plus what the common value at that index is.
    
    Correctly specifying any one or any combination
    of the indices in this list of indices will result
    in a more specific vector than p that is coextensive.
    
    From this list, you can construct (or count) all of the
    more specified pfvs that are coextensive with p.
    '''
    A = ext_p
    n = A.shape[0]
    if n == 0:
        return set()
    n_opp = -1.0 * n
#     zeros = np.nonzero(p)[0]
    zero_indices = np.array(tuple(  set(range(len(p))) - set(np.nonzero(p)[0])  ), dtype=myint)
    specifiable_indices = set()
    for j in zero_indices:
        j_col_sum = np.sum(A[:,j])
        if j_col_sum == n:
            specifiable_indices.add((j, 1))
        if j_col_sum == n_opp:
            specifiable_indices.add((j, -1))
    return specifiable_indices

def specify(p, specs):
    '''
    Given a partial feature vector p and a set of
        (index i, non-zero value v)
    pairs where p_i ≠ 0, returns a more specific p'
    where p'_i = v as indicated by spec.
    '''
    p_prime = p.copy()
    for i,v in specs:
        p_prime[i] = v
    return p_prime

def entailed_pfvs(p, O, no_total_fvs = True):
    '''
    Given a partial feature vector p and a set of objects
    (total feature vectors) O, this returns the set of
    partial feature vectors that are strictly more specific
    than p that have the same extension in O.
    '''
    x_p = np.array(extension(p, O, False))
    specifiable_indices = specifiable_zero_indices(p, x_p)
    num_specifiable_indices = len(specifiable_indices)
    specifications = {tuple(combinations(specifiable_indices, r) )
                      for r in range(1, num_specifiable_indices+1)}
    entailed_vectors = np.array([specify(p, spec)
                                 for r_level in specifications 
                                 for spec in r_level], dtype=myint)
    if not no_total_fvs:
        return entailed_vectors
    entailed_pfvs = np.array([v for v in entailed_vectors
                              if len(v.nonzero()[0]) < m])
    return entailed_pfvs

# Generation of $S_i$: all pfvs with exactly $i$ specified values

In [59]:
# from functools import reduce

In [60]:
def grand_union(pfvs):
    return reduce(union, pfvs)

In [61]:
def one_hot_stack(indices):
#     n_values = np.max(indices) + 1
#     n_values = num_features
    n_values = m
    return np.eye(n_values,dtype=myint)[indices] 

In [80]:
def indexChoicesToComponentOptions(index_choices):
    indices = list(index_choices)
    one_hots = one_hot_stack(indices)
#     component_options = tuple([(v, -1 * v) for v in one_hots])
    component_options = ((v, -1 * v) for v in one_hots)
    return component_options

def componentOptionsToChoices(component_options):
#     choice_combinations = tuple(product(*component_options))
    choice_combinations = product(*component_options)
#     return tuple(starmap(union,
#                          choice_combinations))
#     return tuple(map(grand_union,
#                      choice_combinations))
    return map(grand_union, choice_combinations)

def make_Si_naive(i):
    index_choices = combinations(range(m), i)
    componentOptions = (indexChoicesToComponentOptions(c) for c in index_choices)
    componentChoices = (componentOptionsToChoices(o) for o in componentOptions)
#     choices_flattened = reduce(lambda a,b: a + b, componentChoices)
    choices_flattened = tuple(reduce(lambda a,b: chain.from_iterable([a,b]), componentChoices))
    return np.array(choices_flattened)

In [81]:
construct_Si = make_Si_naive

In [104]:
calculate_Xi = interpretation

#FIXME this can/should be parallelized and memory mapped
def calculate_Xi(Si, O):
    return np.array([interpretation(p, O) for p in Si], dtype=myint)

# Generate $\overline{S}_i$ (compress $S_i$) by removing vectors with empty extension in $S_i$

In [83]:
EMPTY = np.zeros((l,), dtype=myint)

In [84]:
#FIXME this can/should be parallelized and memory mapped
def make_Si_bar_naive(Si, Xi):
    return np.array([v for i,v in enumerate(Si) 
#                      if not empty_extension(Xj[i])])
                     if not np.array_equal(EMPTY, Xi[i])])

In [85]:
construct_Si_bar = make_Si_bar_naive

# Convert $\overline{X}_i$ to a sparse representation

In [86]:
# import sparse

In [87]:
def density(a):
    num_cells = reduce(lambda x,y: x * y, a.shape)
    d = len(np.nonzero(a)[0]) / num_cells
    return d

def sparsity(a):
    return 1 - density(a)

In [88]:
def to_sparse(v):
    return sparse.COO(v)

# Local processing pipeline to generate non-sparse $\overline{S}_i, \overline{X}_i$, $\forall i$ 

In [107]:
def construct_Si_bar_Xi_bar(i, O):
    Si = construct_Si(i)
    Xi = calculate_Xi(Si, O)
    Si_bar = construct_Si_bar(Si, Xi)
    del Si
    del Xi
    #FIXME you shouldn't have to recalculate the extensions of everything in Si_bar!
    Xi_bar = calculate_Xi(Si_bar, O)
    return Si_bar, Xi_bar #these (or at least Xi_bar) should be sparse (and memory mapped) representations

In [103]:
construct_Si(1)

array([[ 1,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0],
       [ 0,  1,  0,  0,  0],
       [ 0, -1,  0,  0,  0],
       [ 0,  0,  1,  0,  0],
       [ 0,  0, -1,  0,  0],
       [ 0,  0,  0,  1,  0],
       [ 0,  0,  0, -1,  0],
       [ 0,  0,  0,  0,  1],
       [ 0,  0,  0,  0, -1]], dtype=int8)

In [109]:
S1_bar, X1_bar = construct_Si_bar_Xi_bar(1, O)
sparsity(S1_bar)
sparsity(X1_bar)

0.8

0.5

In [110]:
S1_bar

array([[ 1,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0],
       [ 0,  1,  0,  0,  0],
       [ 0, -1,  0,  0,  0],
       [ 0,  0,  1,  0,  0],
       [ 0,  0, -1,  0,  0],
       [ 0,  0,  0,  1,  0],
       [ 0,  0,  0, -1,  0],
       [ 0,  0,  0,  0,  1],
       [ 0,  0,  0,  0, -1]], dtype=int8)

In [111]:
S2_bar, X2_bar = construct_Si_bar_Xi_bar(2, O)
sparsity(S2_bar)
sparsity(X2_bar)

0.6

0.75

In [112]:
S3_bar, X3_bar = construct_Si_bar_Xi_bar(3, O)
sparsity(S3_bar)
sparsity(X3_bar)

0.4

0.8717948717948718

In [113]:
S4_bar, X4_bar = construct_Si_bar_Xi_bar(4, O)
sparsity(S4_bar)
sparsity(X4_bar)

0.19999999999999996

0.9242424242424242

In [114]:
S5_bar, X5_bar = construct_Si_bar_Xi_bar(5, O)
sparsity(S5_bar)
sparsity(X5_bar)

0.0

0.9473684210526316