In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import gc

In [6]:
patterns = pd.DataFrame(np.random.randint(0, 2, (5, 10)))
patterns

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,1,1,1,1,0,1,0,0,1
1,1,0,0,1,0,0,1,1,1,1
2,0,1,0,1,1,0,1,1,0,1
3,1,1,0,1,1,0,1,1,0,0
4,1,0,0,1,0,0,1,0,0,0


In [12]:
def generate_flipped_pattern(row, eta):
    """flip n-th bit to allow more freedom(false positive)
       if et = 0 then pattern as is
       if et = 1 then loop over each bit and force it to one
       et = 2 loop over 2 bits and flip them ... etc
       drop any duplicate patterns"""
    temp = np.tile(row, (row.shape[0]-eta+1, 1) )

    for nth in range(row.shape[0]-eta+1):
        temp[nth, nth:nth+eta] = 1

    return np.unique(temp, axis=0)


def flip_pattern(patterns, eta):
    df = pd.DataFrame()
    
    for i, base in patterns.iterrows():
        # to calculate hamming distance
        hamming_distance = lambda row: np.count_nonzero(base != row)
    
        # generate flipped patterns
        flipped_pattern = generate_flipped_pattern(base, eta)
        idx = np.where( np.apply_along_axis(hamming_distance, 1, flipped_pattern) == eta)
        flipped = np.unique(flipped_pattern[idx], axis=0)
        if len(flipped) == 0: continue

        yield flipped

In [14]:
generate_flipped_pattern(patterns.loc[1], 2)

array([[1, 0, 0, 1, 0, 0, 1, 1, 1, 1],
       [1, 0, 0, 1, 0, 1, 1, 1, 1, 1],
       [1, 0, 0, 1, 1, 0, 1, 1, 1, 1],
       [1, 0, 0, 1, 1, 1, 1, 1, 1, 1],
       [1, 0, 1, 1, 0, 0, 1, 1, 1, 1],
       [1, 1, 0, 1, 0, 0, 1, 1, 1, 1],
       [1, 1, 1, 1, 0, 0, 1, 1, 1, 1]])

In [15]:
eta = 2
df_eta = iter(flip_pattern(patterns, eta))

for p in df_eta:
    print(p)
    print('-'*30)

[[1 1 1 1 1 0 1 1 1 1]]
------------------------------
[[1 0 0 1 1 1 1 1 1 1]
 [1 1 1 1 0 0 1 1 1 1]]
------------------------------
[[1 1 0 1 1 0 1 1 1 1]]
------------------------------
[[1 0 0 1 0 0 1 0 1 1]
 [1 0 0 1 0 0 1 1 1 0]
 [1 0 0 1 1 1 1 0 0 0]
 [1 1 1 1 0 0 1 0 0 0]]
------------------------------


### Optimize for large dataframe

In [16]:
path = Path().cwd().parent / 'MNIST' / 'lastHiddenLayer' / 'raw' / 'MNIST_Adam-256-60'
path

PosixPath('/home/ah19/runtime-monitoring/MNIST/lastHiddenLayer/raw/MNIST_Adam-256-60')

In [17]:
# import Data
df = pd.read_csv(path / f"{path.name}_train.csv")

# split train data
df_true = df[df["true"] == True].copy()
df_true = df_true.drop("true", axis=1).reset_index(drop=True)

y = df_true['y']
df_true = (df_true.drop('y', axis=1) > 0).astype(np.int8).drop_duplicates()

In [18]:
df_true.head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x50,x51,x52,x53,x54,x55,x56,x57,x58,x59
0,1,1,0,1,1,1,0,1,0,0,...,0,0,1,1,1,0,0,0,0,1
1,1,0,0,0,0,1,1,0,0,1,...,0,0,1,0,1,0,1,0,1,0
2,1,0,0,1,0,0,1,1,1,0,...,1,0,1,0,0,0,0,0,0,0
3,0,1,0,1,1,1,1,1,1,0,...,1,0,1,1,0,0,0,0,0,1
4,0,0,0,1,0,0,0,0,0,1,...,1,0,0,0,1,1,0,0,0,0


In [19]:
def flip_pattern(patterns, eta):
    """flip n-th bit to allow more freedom(false positive)
       if eta = 0 then pattern as is
       if eta = 1 then loop over each bit and force it to one
       eta = 2 loop over 2 bits and flip them ... etc
       drop any duplicate patterns"""
    temp = patterns.copy()
    for nth in range(df.shape[1]-eta+1):
        temp[:, nth:nth+eta] = 1

        # compare hamming distance to original
        idx = (patterns ^ temp).sum(1) == eta
        if idx.sum() == 0: continue

        # pick rows with respect to eta
        yield np.unique(temp[idx], axis=0)
        
        # reset to original value
        temp[:, nth:nth+eta] = patterns[:, nth:nth+eta]

In [20]:
%%timeit

eta = 1
df_eta = flip_pattern(patterns.to_numpy(), eta)

for d in df_eta: _

del df_eta
gc.collect()

The slowest run took 9.38 times longer than the fastest. This could mean that an intermediate result is being cached.
212 ms ± 154 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### XOR test

In [21]:
a = 1
b = 2

In [22]:
bin(a),bin(b)

('0b1', '0b10')

In [23]:
a = a ^ b
print(a, bin(a))

b = a ^ b
print(b, bin(b))

a = a ^ b
print(a, bin(a))

3 0b11
1 0b1
2 0b10


In [24]:
bin(a),bin(b)

('0b10', '0b1')

In [25]:
a, b

(2, 1)