In [1]:
# PyDDLib
# https://github.com/thiagopbueno/pyddlib/blob/master/pyddlib/bdd.py

# !pip install pyddlib

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyddlib.bdd import BDD

ModuleNotFoundError: No module named 'pyddlib'

# Logic

In [None]:
vars = [BDD.variable(i) for i in range(3)]

In [None]:
vars[0], ~vars[0]

In [6]:
dd = vars[0]

In [47]:
img1 = vars[0] & ~vars[1] & vars[2]
img2 = ~vars[0] & vars[1] & vars[2]
dd = img1
dd |= img2

In [48]:
img3 = ~vars[0] & vars[1] & ~vars[2]
(dd & img3).is_zero()

True

In [49]:
img3 = ~vars[0] & vars[1] & vars[2]
(dd & img3).is_zero()

False

# Numpy Syntax

In [50]:
del BDD
from pyddlib.bdd import BDD

vars = [BDD.variable(i) for i in range(4)]
vars_not = [~v for v in vars]

fn = lambda row: np.where( row == 1, vars, vars_not )

In [51]:
# important pass pattern as numpy array
# otherwise will not work

dd = BDD.zero() # initiate

img1 = np.bitwise_and.reduce( fn(np.array([1, 0, 1, 1])) )
dd = img1

img2 = np.bitwise_and.reduce( fn(np.array([0, 1, 1, 1])) )
dd |= img2

In [52]:
img3 = np.bitwise_and.reduce( fn(np.array([0, 1, 0, 1])) )
(dd & img3).is_zero()

True

In [53]:
img3 = np.bitwise_and.reduce( fn(np.array([0, 1, 1, 1])) )
(dd & img3).is_zero()

False

# Monitor Class

In [54]:
del BDD

In [12]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from pympler import asizeof
import time

class MonitorBDD:
    def __init__(self, num_neurons, thld_1, thld_2=None, thld_3=None, neurons=None, verbose=False):
        
        from pyddlib.bdd import BDD

        self.bdd = BDD.zero()
        self.num_neurons = num_neurons
        self.verbose = verbose

        self.thld_1 = thld_1
        self.thld_2 = thld_2
        self.thld_3 = thld_3
        self.num_bits = 2 if thld_2 is not None or thld_3 is not None else 1
        self.num_vars = num_neurons * self.num_bits

        self.neurons = neurons
        if self.neurons is not None:
            self.neurons = np.array([int(n[1:]) for n in neurons])
            self.thld_1 = thld_1[self.neurons]
            self.thld_2 = thld_2[self.neurons] if self.thld_2 is not None else None
            self.thld_3 = thld_3[self.neurons] if self.thld_3 is not None else None

        self.vars, self.vars_not = self.__declare_vars()

        self.stats = pd.DataFrame({
            'thld': [],
            'df': [],
            'build_time': [],
            'size_before_reorder_mb': [],
            'reorder_time': [],
            'size_after_reorder_mb': []
        })


    def __declare_vars(self):
        """TODO"""
        # generate vars either x0_0 or x0_0 and x0_1 per neuron
        if self.neurons is not None: vars_range = range(self.neurons.shape[0])
        else: vars_range = range(self.num_neurons)

        vars = np.array([self.bdd.variable(i) for i in vars_range])

        # generate negative vars
        vars_not = np.array([ ~v for v in vars ])

        return vars, vars_not


    def __multi_thlds(self, x):
        """TODO"""
        x = x.reshape(x.shape[0], 1)
        x = np.where(x == [0], [0, 0], # 0
             np.where(x == [1], [1, 0], # 1
             np.where(x == [2], [0, 1], # 2
             [1, 1] ) ) )# 3
        return np.reshape(x, x.shape[0] * 2)


    def __applying_thlds(self, df):
        """TODO"""
        df_thld = (df >=  self.thld_1).astype('int8')

        if self.thld_2 is not None:
            df_thld += (df >=  self.thld_2).astype('int8')

        if self.thld_3 is not None:
            df_thld += (df >=  self.thld_3).astype('int8')

        return df_thld.to_numpy()


    def check_pattern_length(self, row):
        """TODO"""
        if self.num_bits == 2:
            assert len(self.vars)/2 == row.shape[0], "ERROR: VARS and ROW do not match!"
        else:
            assert len(self.vars) == row.shape[0], "ERROR: VARS and ROW do not match!"


    def construct_pattern(self, row):
        """TODO"""
        # replace 1 with vars and 0 with vars_not
        expr = np.where( row == 1, self.vars, self.vars_not )
        return np.bitwise_and.reduce( expr )


    def __add_one_pattern(self, row):
        """TODO"""
        self.bdd |= self.construct_pattern(row)


    def flip_bit(self, patterns, eta):
        """flip n-th bit to allow more freedom(false positive)
           if et = 0 then pattern as is
           if et = 1 then loop over each bit and force it to one
           et = 2 loop over 2 bits and flip them ... etc
           drop any duplicate patterns"""

        for nth in range(patterns.shape[1]-eta+1):
            temp = patterns.copy()
            temp[:, nth:nth+eta] = 1
            temp = np.unique(temp, axis=0)
            yield temp


    def add_dataframe(self, df, eta=0, eval_dfs=None):
        """TODO"""
        start = time.perf_counter()
        if self.neurons is not None:
            df = df[df.columns[self.neurons]].drop_duplicates()
        else:
            df = df[df.columns[:self.num_neurons]].drop_duplicates()

        patterns = self.__applying_thlds(df)

        if self.num_bits == 2:
            patterns = np.apply_along_axis(self.__multi_thlds, 1, patterns)


        for i in range(df.shape[0]):
            self.__add_one_pattern(patterns[i])
        build_time = round(time.perf_counter() - start, 3)


        row = self.stats.shape[0]+1
        self.stats.loc[row, 'df'] = 0
        self.stats.loc[row, 'build_time'] = build_time
        self.stats.loc[row, 'size_before_reorder_mb'] = round( asizeof.asizeof(self) * 1e-6, 3)

        # add column for scoring
        if eval_dfs is not None:
            for eval_df in eval_dfs:
                self.evaluate_dataframe(eval_df, 0)


        start = time.perf_counter()
#         BDD.reorder(self.bdd)
        bdd_reorder_time = round(time.perf_counter() - start, 3)

        self.stats.loc[row, 'reorder_time'] = bdd_reorder_time
        self.stats.loc[row, 'size_after_reorder_mb'] = round( asizeof.asizeof(self) * 1e-6, 3)


        # flip bit
        if eta > 0:
            # evaluate starting from 1 degree of freedom
            for et in range(1, eta+1):
                row = self.stats.shape[0]+1
                start = time.perf_counter()

                # flip each neuron to 1 then add to BDD
                for flipped_patterns in self.flip_bit(patterns, et):
                    for i in range(flipped_patterns.shape[0]):
                        self.__add_one_pattern(flipped_patterns[i])
                    # end loop flipped_patterns

                build_time = round(time.perf_counter() - start, 3)
                self.stats.loc[row, 'df'] = et
                self.stats.loc[row, 'build_time'] = build_time
                self.stats.loc[row, 'size_before_reorder_mb'] = round( asizeof.asizeof(self) * 1e-6, 3)

                # reorder
                start = time.perf_counter()
#                 BDD.reorder(self.bdd)
                bdd_reorder_time = round(time.perf_counter() - start, 3)

                self.stats.loc[row, 'reorder_time'] = bdd_reorder_time
                self.stats.loc[row, 'size_after_reorder_mb'] = round( asizeof.asizeof(self) * 1e-6, 3)

                # add column for scoring
                if eval_dfs is not None:
                    for eval_df in eval_dfs:
                        self.evaluate_dataframe(eval_df, et)

        # return evaluated dataframes
        if eval_dfs is not None:
            return eval_dfs

        self.stats = self.stats.loc[self.stats['df'] == eta]
        return


    def check_one_pattern(self, row):
        """TODO"""
        if (self.bdd & self.construct_pattern(row) ).is_zero():
            return 0 # means not found
        else:
            return 1 # found it


    def evaluate_dataframe(self, df, eta=None):
        """TODO"""
        bdd_results = np.zeros(df.shape[0], dtype=np.int8)

        if self.neurons is not None:
            patterns = self.__applying_thlds(df[df.columns[self.neurons]])
        else:
            patterns = self.__applying_thlds(df[df.columns[:self.num_neurons]])

        if self.num_bits == 2:
            patterns = np.apply_along_axis(self.__multi_thlds, 1, patterns)

        for i in range(df.shape[0]):
            bdd_results[i] = self.check_one_pattern(patterns[i])

        if eta is not None:
            df[f'bdd_{eta}'] = bdd_results
            return

        # if the function called specifically, return scored df after evaluating
        df['bdd'] = bdd_results

        return self.score_dataframe(df)


    def score_dataframe(self, df, bdd_col='bdd'):
        """TODO"""
        df_out_of_pattern_images = df.loc[df[bdd_col] == 0, ['y', bdd_col]].groupby('y').count().sort_index()
        df_out_of_pattern_images.columns = [bdd_col + '_false']

        df_out_of_pattern_misclassified_images = df.loc[(df[bdd_col] == 0) & (df['true'] == False), ['y', bdd_col]].groupby('y').count().sort_index()
        df_out_of_pattern_misclassified_images.columns = [bdd_col + '_false_miss_classified']

        df_scores = df_out_of_pattern_images.join(df_out_of_pattern_misclassified_images).join(df['y'].value_counts())
        df_scores.rename({'y': 'count'}, axis=1, inplace=True)

        del df_out_of_pattern_images, df_out_of_pattern_misclassified_images

        total_images = df.shape[0]
        out_of_pattern_images = (df[bdd_col] == 0).sum()
        out_of_pattern_misclassified_images = ((df['true'] == False) & (df[bdd_col] == 0)).sum()
        df_scores.loc['all', :] = [out_of_pattern_images, out_of_pattern_misclassified_images, total_images]
        # if data frame return 0 rows, a nan will be placed
        df_scores.fillna(0, inplace=True)
        # calculate metrics
        df_scores['outOfPattern'] = df_scores[bdd_col + '_false'] / df_scores['count']
        df_scores['outOfPatternMissClassified'] = df_scores[bdd_col + '_false_miss_classified'] / df_scores[bdd_col + '_false']
        # if class is never missclassified and bdd recognize all of his patterns
        # both outOfPattern and outOfPatternMissClassified will be 0
        # so the division will result in NaN
        df_scores['outOfPatternMissClassified'].replace({np.nan:1.0, 0.0: 1.0}, inplace=True)
        df_scores['outOfPattern'].replace({np.nan:0.0}, inplace=True)
        # no missclassification for a class
        df_scores[bdd_col + '_false'].replace({np.nan:0.0}, inplace=True)
        df_scores[bdd_col + '_false_miss_classified'].replace({np.nan:0.0}, inplace=True)
        # add mean of all classes
        a1 = df_scores.loc[df_scores.index != 'all', 'outOfPattern'].mean()
        a2 = df_scores.loc[df_scores.index != 'all', 'outOfPatternMissClassified'].mean()
        df_scores.loc['all_mean', :] = [0, 0, total_images, a1, a2]

        if bdd_col=='bdd':
            return df_scores.reset_index()
        return df_scores


    def score_dataframe_multi_eta(self, df, eta):
        """TODO"""
        df_scores = pd.DataFrame()

        for et in range(eta+1):
            temp = self.score_dataframe(df, f'bdd_{et}')
            temp['eta'] = et
            temp.columns = [*map(lambda x: x.replace(f'bdd_{et}', ''), temp.columns)]
            df_scores = pd.concat([df_scores, temp])
            del temp

        return df_scores.reset_index()


    def plot_stats(self, df, stage, true=True, save_folder=None, prefix=None):
        """TODO"""
        df = df.loc[df['true'] == true].set_index('y')
        mean_ = df['outOfPattern'].mean().round(3)

        neurons = f' Number of neuron: {len(self.neurons)}' if self.neurons is not None else None
        title = f'{stage} - {true} - #out of pattern: {mean_}{neurons}\n'
        filename = f'{stage.lower()}_{true}_outOfPattern'
        color = 'teal' if true else 'orange'

        df['outOfPattern'].plot(
            kind='bar', title=title, legend=False, color=color, xlabel='', hatch='x', edgecolor='black')
        plt.yticks(np.arange(0, 1.1, 0.1))
        plt.axhline(mean_, color='red', linewidth=2, linestyle='--')


        if save_folder is not None:
            plt.savefig(save_folder / f'bdd_scores_{filename}_{prefix}.jpg', dpi=150, transparent=False)

        plt.show()



def build_bdd_multi_etas(args):
    df_train, df_test, df_true, neurons, thld_name, thld, eta, save_path = args

    from dd.autoref import BDD

    # construcr MonitorBDD
    patterns = MonitorBDD( df_true.shape[1]-1, thld, neurons=neurons )
    print(f'{thld_name} - eta: {eta}')

    # build
    patterns.add_dataframe( df_true, eta, eval_dfs=[df_train, df_test] )

    # collect scores
    df_bdd_info = patterns.stats.copy()
    df_bdd_info['thld'] = thld_name

    df_train_scores = patterns.score_dataframe_multi_eta(df_train, eta)
    df_test_scores = patterns.score_dataframe_multi_eta(df_test, eta)
    df_train_scores['stage'] = 'train'
    df_test_scores['stage'] = 'test'

    # combine scores
    df_bdd_scores = pd.concat([df_train_scores, df_test_scores]).reset_index(drop=True)
    df_bdd_scores['thld'] = thld_name

    # delete variables
    if save_path is not None:
        temp_name = f'multi-etas-{thld_name}{"-neurons" if neurons else ""}'

        with open(save_path / f'{temp_name}.pkl', "wb") as f:
            pickle.dump(patterns, f, pickle.HIGHEST_PROTOCOL)
        df_bdd_info.to_csv(save_path / f'{temp_name}-info.csv', index=False)
        df_bdd_scores.to_csv(save_path / f'{temp_name}-scores.csv', index=False)

    del BDD, patterns
    del df_train_scores, df_test_scores

    print(f'> Done! [ {thld_name} - eta: {eta} ]')

    return df_bdd_info, df_bdd_scores



def build_bdd(args):
    df_train, df_test, df_true, neurons, thld_name, thld, eta, save_path = args

    from dd.autoref import BDD

    # construcr MonitorBDD
    patterns = MonitorBDD( df_true.shape[1]-1, thld, neurons=neurons )
    print(f'{thld_name} - eta: {eta}')

    # build
    patterns.add_dataframe( df_true, eta)

    # collect scores
    df_bdd_info = patterns.stats.copy()
    df_bdd_info['thld'] = thld_name

    df_train_scores = patterns.evaluate_dataframe(df_train)
    df_test_scores = patterns.evaluate_dataframe(df_test)

    df_train_scores['stage'] = 'train'
    df_train_scores['eta'] = eta

    df_test_scores['stage'] = 'test'
    df_test_scores['eta'] = eta

    # combine scores
    df_bdd_scores = pd.concat([df_train_scores, df_test_scores]).reset_index(drop=True)
    df_bdd_scores['thld'] = thld_name

    if save_path is not None:
        temp_name = f'single-{thld_name}{"-neurons" if neurons else ""}'

        with open(save_path / f'{temp_name}.pkl', "wb") as f:
            pickle.dump(patterns, f, pickle.HIGHEST_PROTOCOL)
        df_bdd_info.to_csv(save_path / f'{temp_name}-info.csv', index=False)
        df_bdd_scores.to_csv(save_path / f'{temp_name}-scores.csv', index=False)

    # delete variables
    del BDD, patterns
    del df_train_scores, df_test_scores

    print(f'> Done! [ {thld_name} - eta: {eta} ]')

    return df_bdd_info, df_bdd_scores

# TEST Class

In [6]:
import pandas as pd
import numpy as np
import sys
from pathlib import Path
from itertools import product

In [7]:
# env variables
REPO_PATH = '/home/ah19/runtime-monitoring'
DATASET = 'MNIST'
PREFIX = 'Adam-256-30'
FILENAME_POSTFIX = f'{DATASET}_{PREFIX}'
DATA_FALVOR = 'raw'
LOAD_NEURONS = True
POSTFIX = 'PyDDLib'
NUM_NEURONS = int(PREFIX.split('-')[-1])

sys.path.append(f'{REPO_PATH}/utilities')
from utils import load_json
from pathManager import fetchPaths

In [8]:
# paths
base = Path(REPO_PATH)
paths = fetchPaths(base, DATASET)

path = paths[DATASET.lower()]
path_bdd_testingThresholds_raw = paths['bdd_testingThresholds_raw'] / FILENAME_POSTFIX


# path_lastHiddenLayer = paths['lastHiddenLayer']
# path_lastHiddenLayer_raw = paths['lastHiddenLayer_raw']
path_lastHiddenLayer_pca = paths['lastHiddenLayer_pca']
path_lastHiddenLayer_pca_single = path_lastHiddenLayer_pca / FILENAME_POSTFIX / 'Single'
# path_lastHiddenLayer_pca_classes = path_lastHiddenLayer_pca / FILENAME_POSTFIX / 'Classes'

path_lastHiddenLayer = paths['lastHiddenLayer_raw'] / FILENAME_POSTFIX

In [9]:
# import Data
print('Loading train Data ...')
df = pd.read_csv(path_lastHiddenLayer / f'{FILENAME_POSTFIX}_train.csv')

# split train data
df_true = df[df['true'] == True].copy()
df_true = df_true.drop('true', axis=1).reset_index(drop=True)

print('Loading test Data ...')
df_test = pd.read_csv(path_lastHiddenLayer / f'{FILENAME_POSTFIX}_test.csv')


print('Loading Neurons ...')
neurons = None
if LOAD_NEURONS:
    neurons = load_json(path_lastHiddenLayer_pca_single / f'{FILENAME_POSTFIX}_neurons.json')


Loading train Data ...
Loading test Data ...
Loading Neurons ...


In [10]:
# define threshold

p = 0.9

thld = np.quantile(df_true.drop('y', axis=1), p, axis=0)
thld_name = f'qth_{p}'

# degree of freedom
eta = 0

In [13]:
# monitor = MonitorBDD( NUM_NEURONS, thld, neurons=neurons )
monitor = MonitorBDD( NUM_NEURONS, thld )

In [14]:
monitor.vars.shape

(30,)

In [15]:
monitor.add_dataframe( df_true, eta )

In [16]:
df_bdd_test = monitor.evaluate_dataframe( df_test, 0 )

In [17]:
df_bdd_test_score = monitor.score_dataframe(df_bdd_test)

AttributeError: 'NoneType' object has no attribute 'loc'

In [24]:
df_bdd_test