In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import gc

In [164]:
patterns = pd.DataFrame(np.random.randint(0, 2, (5, 5)))
patterns

Unnamed: 0,0,1,2,3,4
0,1,1,1,0,1
1,1,1,0,0,0
2,1,1,0,0,1
3,1,0,0,1,1
4,0,1,0,0,1


In [165]:
def flip_pattern(patterns, eta):
    """flip n-th bit to allow more freedom(false positive)
       if eta = 0 then pattern as is
       if eta = 1 then loop over each bit and force it to one
       eta = 2 loop over 2 bits and flip them ... etc
       drop any duplicate patterns"""
    temp = patterns.copy()
    
    for nth in range(patterns.shape[1]-eta+1):
        t1 = temp.copy()
        t0 = temp.copy()
        
        t1[:, nth:nth+eta] = 1
        t0[:, nth:nth+eta] = 0
        
        # compare hamming distance to original
        idx1 = (patterns ^ t1).sum(1) == eta
        idx0 = (patterns ^ t0).sum(1) == eta
        
        if idx1.sum() == 0 and idx0.sum() == 0: continue

        # pick rows with respect to eta
        yield np.vstack([np.unique(t1[idx1], axis=0), np.unique(t0[idx0], axis=0)])

In [166]:
m = 1

t1 = patterns.to_numpy().copy()
t1[:, m:m+1]=1
t0 = patterns.to_numpy().copy()
t0[:, m:m+1]=0

In [168]:
list(flip_pattern(patterns.to_numpy(), 1))[m]

array([[1, 1, 0, 1, 1],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 1],
       [1, 0, 1, 0, 1]])

In [169]:
t1[(patterns.to_numpy() ^ t1).sum(1) == 1]

array([[1, 1, 0, 1, 1]])

In [170]:
t0[(patterns.to_numpy() ^ t0).sum(1) == 1]

array([[1, 0, 1, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 1],
       [0, 0, 0, 0, 1]])

In [160]:
patterns = pd.DataFrame(np.random.randint(0, 2, (60_000, 30)))
patterns.shape

(60000, 30)

In [161]:
%%timeit

eta = 1
df_eta = flip_pattern(patterns.to_numpy(), eta)

for _ in df_eta: ...

del df_eta
gc.collect()

6.74 s ± 19.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [162]:
%%timeit

eta = 5
df_eta = flip_pattern(patterns.to_numpy(), eta)

for _ in df_eta: ...

del df_eta
gc.collect()

680 ms ± 7.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### XOR test

In [21]:
a = 1
b = 2

In [22]:
bin(a),bin(b)

('0b1', '0b10')

In [23]:
a = a ^ b
print(a, bin(a))

b = a ^ b
print(b, bin(b))

a = a ^ b
print(a, bin(a))

3 0b11
1 0b1
2 0b10


In [24]:
bin(a),bin(b)

('0b10', '0b1')

In [25]:
a, b

(2, 1)