# ProtoNN in Tensorflow

This is a simple notebook that illustrates the usage of Tensorflow implementation of ProtoNN. We are using the USPS dataset. Please refer to `fetch_usps.py` for more details on downloading the dataset.

In [1]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

from __future__ import print_function
import sys
import os
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

#sys.path.insert(0, '../../')
# from edgeml.trainer.protoNNTrainer import ProtoNNTrainer
# from edgeml.graph.protoNN import ProtoNN
# import edgeml.utils as utils
# import helpermethods as helper
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV


sys.path.append(r"E:\programming\practice\research\optimized code\EdgeML\examples\tf\ProtoNN")
import helpermethods as helper

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
#HYPERPARAMETER
hyper = {'REG_W': 2.0012967740413277e-06,
 'REG_B': 2.0861588167171992e-05,
 'REG_Z': 2.0478935633536105e-05,
 'SPAR_W': 0.8367487762320901,
 'SPAR_B': 0.9791350117492328,
 'SPAR_Z': 0.9505125522850648,
 'LEARNING_RATE': 0.00012654644856451654,
 'NUM_EPOCHS': 355}

# Helper Methods

In [4]:
#helper methods
sys.path.insert(0, '../')
import argparse


def getModelSize(matrixList, sparcityList, expected=True, bytesPerVar=4):
    '''
    expected: Expected size according to the parameters set. The number of
        zeros could actually be more than that is required to satisfy the
        sparsity constraint.
    '''
    nnzList, sizeList, isSparseList = [], [], []
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        assert A.ndim == 2
        assert s >= 0
        assert s <= 1
        nnz, size, sparse = countnnZ(A, s, bytesPerVar=bytesPerVar)
        nnzList.append(nnz)
        sizeList.append(size)
        hasSparse = (hasSparse or sparse)

    totalnnZ = np.sum(nnzList)
    totalSize = np.sum(sizeList)
    if expected:
        return totalnnZ, totalSize, hasSparse
    numNonZero = 0
    totalSize = 0
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        numNonZero_ = np.count_nonzero(A)
        numNonZero += numNonZero_
        hasSparse = (hasSparse or (s < 0.5))
        if s <= 0.5:
            totalSize += numNonZero_ * 2 * bytesPerVar
        else:
            totalSize += A.size * bytesPerVar
    return numNonZero, totalSize, hasSparse


def getGamma(gammaInit, projectionDim, dataDim, numPrototypes, x_train):
    if gammaInit is None:
        print("Using median heuristic to estimate gamma.")
        gamma, W, B = medianHeuristic(x_train, projectionDim,
                                            numPrototypes)
        print("Gamma estimate is: %f" % gamma)
        return W, B, gamma
    return None, None, gammaInit


def preprocessData(dataDir,w):
    '''
    Loads data from the dataDir and does some initial preprocessing
    steps. Data is assumed to be contained in two files,
    train.npy and test.npy. Each containing a 2D numpy array of dimension
    [numberOfExamples, numberOfFeatures + 1]. The first column of each
    matrix is assumed to contain label information.

    For an N-Class problem, we assume the labels are integers from 0 through
    N-1.
    '''
    # Uncomment for usual training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')
    # Uncomment for time domain training data
    train = np.load(dataDir + '/ttrain_'+str(w)+'.npy')
    test = np.load(dataDir + '/ttest_'+str(w)+'.npy')
    # Uncomment for 1 sensordrop training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')

    dataDimension = int(train.shape[1]) - 1
    x_train = train[:, 1:dataDimension + 1]
    y_train_ = train[:, 0]
    x_test = test[:, 1:dataDimension + 1]
    y_test_ = test[:, 0]

    numClasses = max(y_train_) - min(y_train_) + 1
    numClasses = max(numClasses, max(y_test_) - min(y_test_) + 1)
    numClasses = int(numClasses)

    # mean-var
    mean = np.mean(x_train, 0)
    std = np.std(x_train, 0)
    std[std[:] < 0.000001] = 1
    x_train = (x_train - mean) / std
    x_test = (x_test - mean) / std

    # one hot y-train
    lab = y_train_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_train.shape[0], numClasses))
    lab_[np.arange(x_train.shape[0]), lab] = 1
    y_train = lab_

    # one hot y-test
    lab = y_test_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_test.shape[0], numClasses))
    lab_[np.arange(x_test.shape[0]), lab] = 1
    y_test = lab_

    return dataDimension, numClasses, x_train, y_train, x_test, y_test



def getProtoNNArgs():
    def checkIntPos(value):
        ivalue = int(value)
        if ivalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive int value" % value)
        return ivalue

    def checkIntNneg(value):
        ivalue = int(value)
        if ivalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg int value" % value)
        return ivalue

    def checkFloatNneg(value):
        fvalue = float(value)
        if fvalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg float value" % value)
        return fvalue

    def checkFloatPos(value):
        fvalue = float(value)
        if fvalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive float value" % value)
        return fvalue

    '''
    Parse protoNN commandline arguments
    '''
    parser = argparse.ArgumentParser(
        description='Hyperparameters for ProtoNN Algorithm')

    msg = 'Data directory containing train and test data. The '
    msg += 'data is assumed to be saved as 2-D numpy matrices with '
    msg += 'names `train.npy` and `test.npy`, of dimensions\n'
    msg += '\t[numberOfInstances, numberOfFeatures + 1].\n'
    msg += 'The first column of each file is assumed to contain label information.'
    msg += ' For a N-class problem, labels are assumed to be integers from 0 to'
    msg += ' N-1 (inclusive).'
    parser.add_argument('-d', '--data-dir', required=True, help=msg)
    parser.add_argument('-l', '--projection-dim', type=checkIntPos, default=10,
                        help='Projection Dimension.')
    parser.add_argument('-p', '--num-prototypes', type=checkIntPos, default=20,
                        help='Number of prototypes.')
    parser.add_argument('-g', '--gamma', type=checkFloatPos, default=None,
                        help='Gamma for Gaussian kernel. If not provided, ' +
                        'median heuristic will be used to estimate gamma.')

    parser.add_argument('-e', '--epochs', type=checkIntPos, default=100,
                        help='Total training epochs.')
    parser.add_argument('-b', '--batch-size', type=checkIntPos, default=32,
                        help='Batch size for each pass.')
    parser.add_argument('-r', '--learning-rate', type=checkFloatPos,
                        default=0.001,
                        help='Initial Learning rate for ADAM Optimizer.')

    parser.add_argument('-rW', type=float, default=0.000,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter W ' + '(default = 0.0).')
    parser.add_argument('-rB', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter B ' + '(default = 0.0).')
    parser.add_argument('-rZ', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        'parameter Z ' +
                        '(default = 0.0).')

    parser.add_argument('-sW', type=float, default=1.000,
                        help='Sparsity constraint for predictor parameter W ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sB', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter B ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sZ', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter Z ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-pS', '--print-step', type=int, default=200,
                        help='The number of update steps between print ' +
                        'calls to console.')
    parser.add_argument('-vS', '--val-step', type=int, default=3,
                        help='The number of epochs between validation' +
                        'performance evaluation')
    return parser.parse_args()

# Utils 

In [5]:
#utils
import scipy.cluster
import scipy.spatial
import os


def medianHeuristic(data, projectionDimension, numPrototypes, W_init=None):
    '''
    This method can be used to estimate gamma for ProtoNN. An approximation to
    median heuristic is used here.
    1. First the data is collapsed into the projectionDimension by W_init. If
    W_init is not provided, it is initialized from a random normal(0, 1). Hence
    data normalization is essential.
    2. Prototype are computed by running a  k-means clustering on the projected
    data.
    3. The median distance is then estimated by calculating median distance
    between prototypes and projected data points.

    data needs to be [-1, numFeats]
    If using this method to initialize gamma, please use the W and B as well.

    TODO: Return estimate of Z (prototype labels) based on cluster centroids
    andand labels

    TODO: Clustering fails due to singularity error if projecting upwards

    W [dxd_cap]
    B [d_cap, m]
    returns gamma, W, B
    '''
    assert data.ndim == 2
    X = data
    featDim = data.shape[1]
    if projectionDimension > featDim:
        print("Warning: Projection dimension > feature dimension. Gamma")
        print("\t estimation due to median heuristic could fail.")
        print("\tTo retain the projection dataDimension, provide")
        print("\ta value for gamma.")

    if W_init is None:
        W_init = np.random.normal(size=[featDim, projectionDimension])
    W = W_init
    XW = np.matmul(X, W)
    assert XW.shape[1] == projectionDimension
    assert XW.shape[0] == len(X)
    # Requires [N x d_cap] data matrix of N observations of d_cap-dimension and
    # the number of centroids m. Returns, [n x d_cap] centroids and
    # elementwise center information.
    B, centers = scipy.cluster.vq.kmeans2(XW, numPrototypes)
    # Requires two matrices. Number of observations x dimension of observation
    # space. Distances[i,j] is the distance between XW[i] and B[j]
    distances = scipy.spatial.distance.cdist(XW, B, metric='euclidean')
    distances = np.reshape(distances, [-1])
    gamma = np.median(distances)
    gamma = 1 / (2.5 * gamma)
    return gamma.astype('float32'), W.astype('float32'), B.T.astype('float32')


def multiClassHingeLoss(logits, label, batch_th):
    '''
    MultiClassHingeLoss to match C++ Version - No TF internal version
    '''
    flatLogits = tf.reshape(logits, [-1, ])
    label_ = tf.argmax(label, 1)

    correctId = tf.range(0, batch_th) * label.shape[1] + label_
    correctLogit = tf.gather(flatLogits, correctId)

    maxLabel = tf.argmax(logits, 1)
    top2, _ = tf.nn.top_k(logits, k=2, sorted=True)

    wrongMaxLogit = tf.where(
        tf.equal(maxLabel, label_), top2[:, 1], top2[:, 0])

    return tf.reduce_mean(tf.nn.relu(1. + wrongMaxLogit - correctLogit))


def crossEntropyLoss(logits, label):
    '''
    Cross Entropy loss for MultiClass case in joint training for
    faster convergence
    '''
    return tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                   labels=tf.stop_gradient(label)))


def mean_absolute_error(logits, label):
    '''
    Function to compute the mean absolute error.
    '''
    return tf.reduce_mean(tf.abs(tf.subtract(logits, label)))


def hardThreshold(A, s):
    '''
    Hard thresholding function on Tensor A with sparsity s
    '''
    A_ = np.copy(A)
    A_ = A_.ravel()
    if len(A_) > 0:
        th = np.percentile(np.abs(A_), (1 - s) * 100.0, interpolation='higher')
        A_[np.abs(A_) < th] = 0.0
    A_ = A_.reshape(A.shape)
    return A_


def copySupport(src, dest):
    '''
    copy support of src tensor to dest tensor
    '''
    support = np.nonzero(src)
    dest_ = dest
    dest = np.zeros(dest_.shape)
    dest[support] = dest_[support]
    return dest


def countnnZ(A, s, bytesPerVar=4):
    '''
    Returns # of non-zeros and representative size of the tensor
    Uses dense for s >= 0.5 - 4 byte
    Else uses sparse - 8 byte
    '''
    params = 1
    hasSparse = False
    for i in range(0, len(A.shape)):
        params *= int(A.shape[i])
    if s < 0.5:
        nnZ = np.ceil(params * s)
        hasSparse = True
        return nnZ, nnZ * 2 * bytesPerVar, hasSparse
    else:
        nnZ = params
        return nnZ, nnZ * bytesPerVar, hasSparse


def getConfusionMatrix(predicted, target, numClasses):
    '''
    Returns a confusion matrix for a multiclass classification
    problem. `predicted` is a 1-D array of integers representing
    the predicted classes and `target` is the target classes.

    confusion[i][j]: Number of elements of class j
        predicted as class i
    Labels are assumed to be in range(0, numClasses)
    Use`printFormattedConfusionMatrix` to echo the confusion matrix
    in a user friendly form.
    '''
    assert(predicted.ndim == 1)
    assert(target.ndim == 1)
    arr = np.zeros([numClasses, numClasses])

    for i in range(len(predicted)):
        arr[predicted[i]][target[i]] += 1
    return arr


def printFormattedConfusionMatrix(matrix):
    '''
    Given a 2D confusion matrix, prints it in a human readable way.
    The confusion matrix is expected to be a 2D numpy array with
    square dimensions
    '''
    assert(matrix.ndim == 2)
    assert(matrix.shape[0] == matrix.shape[1])
    RECALL = 'Recall'
    PRECISION = 'PRECISION'
    print("|%s|" % ('True->'), end='')
    for i in range(matrix.shape[0]):
        print("%7d|" % i, end='')
    print("%s|" % 'Precision')

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))

    precisionlist = np.sum(matrix, axis=1)
    recalllist = np.sum(matrix, axis=0)
    precisionlist = [matrix[i][i] / x if x !=
                     0 else -1 for i, x in enumerate(precisionlist)]
    recalllist = [matrix[i][i] / x if x !=
                  0 else -1 for i, x in enumerate(recalllist)]
    for i in range(matrix.shape[0]):
        # len recall = 6
        print("|%6d|" % (i), end='')
        for j in range(matrix.shape[0]):
            print("%7d|" % (matrix[i][j]), end='')
        print("%s" % (" " * (len(PRECISION) - 7)), end='')
        if precisionlist[i] != -1:
            print("%1.5f|" % precisionlist[i])
        else:
            print("%7s|" % "nan")

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))
    print("|%s|" % ('Recall'), end='')

    for i in range(matrix.shape[0]):
        if recalllist[i] != -1:
            print("%1.5f|" % (recalllist[i]), end='')
        else:
            print("%7s|" % "nan", end='')

    print('%s|' % (' ' * len(PRECISION)))


def getPrecisionRecall(cmatrix, label=1):
    trueP = cmatrix[label][label]
    denom = np.sum(cmatrix, axis=0)[label]
    if denom == 0:
        denom = 1
    recall = trueP / denom
    denom = np.sum(cmatrix, axis=1)[label]
    if denom == 0:
        denom = 1
    precision = trueP / denom
    return precision, recall


def getMacroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    precision = np.sum(precisionlist__)
    precision /= len(precisionlist__)
    recall = np.sum(recalllist__)
    recall /= len(recalllist__)
    return precision, recall


def getMicroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    num = 0.0
    for i in range(len(cmatrix)):
        num += cmatrix[i][i]

    precision = num / np.sum(precisionlist)
    recall = num / np.sum(recalllist)
    return precision, recall


def getMacroMicroFScore(cmatrix):
    '''
    Returns macro and micro f-scores.
    Refer: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.8244&rep=rep1&type=pdf
    '''
    precisionlist = np.sum(cmatrix, axis=1)
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    macro = 0.0
    for i in range(len(precisionlist)):
        denom = precisionlist__[i] + recalllist__[i]
        numer = precisionlist__[i] * recalllist__[i] * 2
        if denom == 0:
            denom = 1
        macro += numer / denom
    macro /= len(precisionlist)

    num = 0.0
    for i in range(len(precisionlist)):
        num += cmatrix[i][i]

    denom1 = np.sum(precisionlist)
    denom2 = np.sum(recalllist)
    pi = num / denom1
    rho = num / denom2
    denom = pi + rho
    if denom == 0:
        denom = 1
    micro = 2 * pi * rho / denom
    return macro, micro


class GraphManager:
    '''
    Manages saving and restoring graphs. Designed to be used with EMI-RNN
    though is general enough to be useful otherwise as well.
    '''

    def __init__(self):
        pass

    def checkpointModel(self, saver, sess, modelPrefix,
                        globalStep=1000, redirFile=None):
        saver.save(sess, modelPrefix, global_step=globalStep)
        print('Model saved to %s, global_step %d' % (modelPrefix, globalStep),
              file=redirFile)

    def loadCheckpoint(self, sess, modelPrefix, globalStep,
                       redirFile=None):
        metaname = modelPrefix + '-%d.meta' % globalStep
        basename = os.path.basename(metaname)
        fileList = os.listdir(os.path.dirname(modelPrefix))
        fileList = [x for x in fileList if x.startswith(basename)]
        assert len(fileList) > 0, 'Checkpoint file not found'
        msg = 'Too many or too few checkpoint files for globalStep: %d' % globalStep
        assert len(fileList) is 1, msg
        chkpt = basename + '/' + fileList[0]
        saver = tf.train.import_meta_graph(metaname)
        metaname = metaname[:-5]
        saver.restore(sess, metaname)
        graph = tf.get_default_graph()
        return graph

# Model Trainer - ProtoNN

In [6]:
#Trainer
class ProtoNNTrainer:
    def __init__(self, protoNNObj, regW, regB, regZ,
                 sparcityW, sparcityB, sparcityZ,
                 learningRate, X, Y, lossType='l2'):
        '''
        A wrapper for the various techniques used for training ProtoNN. This
        subsumes both the responsibility of loss graph construction and
        performing training. The original training routine that is part of the
        C++ implementation of EdgeML used iterative hard thresholding (IHT),
        gamma estimation through median heuristic and other tricks for
        training ProtoNN. This module implements the same in Tensorflow
        and python.

        protoNNObj: An instance of ProtoNN class defining the forward
            computation graph. The loss functions and training routines will be
            attached to this instance.
        regW, regB, regZ: Regularization constants for W, B, and
            Z matrices of protoNN.
        sparcityW, sparcityB, sparcityZ: Sparsity constraints
            for W, B and Z matrices. A value between 0 (exclusive) and 1
            (inclusive) is expected. A value of 1 indicates dense training.
        learningRate: Initial learning rate for ADAM optimizer.
        X, Y : Placeholders for data and labels.
            X [-1, featureDimension]
            Y [-1, num Labels]
        lossType: ['l2', 'xentropy']
        '''
        self.protoNNObj = protoNNObj
        self.__regW = regW
        self.__regB = regB
        self.__regZ = regZ
        self.__sW = sparcityW
        self.__sB = sparcityB
        self.__sZ = sparcityZ
        self.__lR = learningRate
        self.X = X
        self.Y = Y
        self.sparseTraining = True
        if (sparcityW == 1.0) and (sparcityB == 1.0) and (sparcityZ == 1.0):
            self.sparseTraining = False
            print("Sparse training disabled.", file=sys.stderr)
        # Define placeholders for sparse training
        self.W_th = None
        self.B_th = None
        self.Z_th = None
        self.__lossType = lossType
        self.__validInit = False
        self.__validInit = self.__validateInit()
        self.__protoNNOut = protoNNObj(X, Y)
        self.loss = self.__lossGraph()
        self.trainStep = self.__trainGraph()
        self.__hthOp = self.__getHardThresholdOp()
        self.accuracy = protoNNObj.getAccuracyOp()

    def __validateInit(self):
        self.__validInit = False
        msg = "Sparsity value should be between"
        msg += " 0 and 1 (both inclusive)."
        assert self.__sW >= 0. and self.__sW <= 1., 'W:' + msg
        assert self.__sB >= 0. and self.__sB <= 1., 'B:' + msg
        assert self.__sZ >= 0. and self.__sZ <= 1., 'Z:' + msg
        d, dcap, m, L, _ = self.protoNNObj.getHyperParams()
        msg = 'Y should be of dimension [-1, num labels/classes]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.Y.shape)) == 2, msg
        assert (self.Y.shape[1] == L), msg
        msg = 'X should be of dimension [-1, featureDimension]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.X.shape) == 2), msg
        assert (self.X.shape[1] == d), msg
        self.__validInit = True
        msg = 'Values can be \'l2\', or \'xentropy\''
        if self.__lossType not in ['l2', 'xentropy']:
            raise ValueError(msg)
        return True

    def __lossGraph(self):
        pnnOut = self.__protoNNOut
        l1, l2, l3 = self.__regW, self.__regB, self.__regZ
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        if self.__lossType == 'l2':
            with tf.name_scope('protonn-l2-loss'):
                loss_0 = tf.nn.l2_loss(self.Y - pnnOut)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        elif self.__lossType == 'xentropy':
            with tf.name_scope('protonn-xentropy-loss'):
                loss_0 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pnnOut,
                                                         labels=tf.stop_gradient(self.Y))
                loss_0 = tf.reduce_mean(loss_0)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        return loss

    def __trainGraph(self):
        with tf.name_scope('protonn-gradient-adam'):
            trainStep = tf.train.AdamOptimizer(self.__lR)
            trainStep = trainStep.minimize(self.loss)
        return trainStep

    def __getHardThresholdOp(self):
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        self.W_th = tf.placeholder(tf.float32, name='W_th')
        self.B_th = tf.placeholder(tf.float32, name='B_th')
        self.Z_th = tf.placeholder(tf.float32, name='Z_th')
        with tf.name_scope('hard-threshold-assignments'):
            hard_thrsd_W = W.assign(self.W_th)
            hard_thrsd_B = B.assign(self.B_th)
            hard_thrsd_Z = Z.assign(self.Z_th)
            hard_thrsd_op = tf.group(hard_thrsd_W, hard_thrsd_B, hard_thrsd_Z)
        return hard_thrsd_op

    def train(self, batchSize, totalEpochs, sess,
              x_train, x_val, y_train, y_val, noInit=False,
              redirFile=None, printStep=10, valStep=3):
        '''
        Performs dense training of ProtoNN followed by iterative hard
        thresholding to enforce sparsity constraints.

        batchSize: Batch size per update
        totalEpochs: The number of epochs to run training for. One epoch is
            defined as one pass over the entire training data.
        sess: The Tensorflow session to use for running various graph
            operators.
        x_train, x_val, y_train, y_val: The numpy array containing train and
            validation data. x data is assumed to in of shape [-1,
            featureDimension] while y should have shape [-1, numberLabels].
        noInit: By default, all the tensors of the computation graph are
        initialized at the start of the training session. Set noInit=False to
        disable this behaviour.
        printStep: Number of batches between echoing of loss and train accuracy.
        valStep: Number of epochs between evolutions on validation set.
        '''
        d, d_cap, m, L, gamma = self.protoNNObj.getHyperParams()
        assert batchSize >= 1, 'Batch size should be positive integer'
        assert totalEpochs >= 1, 'Total epochs should be positive integer'
        assert x_train.ndim == 2, 'Expected training data to be of rank 2'
        assert x_train.shape[1] == d, 'Expected x_train to be [-1, %d]' % d
        assert x_val.ndim == 2, 'Expected validation data to be of rank 2'
        assert x_val.shape[1] == d, 'Expected x_val to be [-1, %d]' % d
        assert y_train.ndim == 2, 'Expected training labels to be of rank 2'
        assert y_train.shape[1] == L, 'Expected y_train to be [-1, %d]' % L
        assert y_val.ndim == 2, 'Expected validation labels to be of rank 2'
        assert y_val.shape[1] == L, 'Expected y_val to be [-1, %d]' % L

        # Numpy will throw asserts for arrays
        if sess is None:
            raise ValueError('sess must be valid Tensorflow session.')

        trainNumBatches = int(np.ceil(len(x_train) / batchSize))
        valNumBatches = int(np.ceil(len(x_val) / batchSize))
        x_train_batches = np.array_split(x_train, trainNumBatches)
        y_train_batches = np.array_split(y_train, trainNumBatches)
        x_val_batches = np.array_split(x_val, valNumBatches)
        y_val_batches = np.array_split(y_val, valNumBatches)
        if not noInit:
            sess.run(tf.global_variables_initializer())
        X, Y = self.X, self.Y
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        for epoch in range(totalEpochs):
            for i in range(len(x_train_batches)):
                batch_x = x_train_batches[i]
                batch_y = y_train_batches[i]
                feed_dict = {
                    X: batch_x,
                    Y: batch_y
                }
                sess.run(self.trainStep, feed_dict=feed_dict)
                if i % printStep == 0:
                    loss, acc = sess.run([self.loss, self.accuracy],
                                         feed_dict=feed_dict)
                    msg = "Epoch: %3d Batch: %3d" % (epoch, i)
                    msg += " Loss: %3.5f Accuracy: %2.5f" % (loss, acc)
                    print(msg, file=redirFile)

            # Perform Hard thresholding
            if self.sparseTraining:
                W_, B_, Z_ = sess.run([W, B, Z])
                fd_thrsd = {
                    self.W_th: hardThreshold(W_, self.__sW),
                    self.B_th: hardThreshold(B_, self.__sB),
                    self.Z_th: hardThreshold(Z_, self.__sZ)
                }
                sess.run(self.__hthOp, feed_dict=fd_thrsd)

            if (epoch + 1) % valStep  == 0:
                acc = 0.0
                loss = 0.0
                for j in range(len(x_val_batches)):
                    batch_x = x_val_batches[j]
                    batch_y = y_val_batches[j]
                    feed_dict = {
                        X: batch_x,
                        Y: batch_y
                    }
                    acc_, loss_ = sess.run([self.accuracy, self.loss],
                                           feed_dict=feed_dict)
                    acc += acc_
                    loss += loss_
                acc /= len(y_val_batches)
                loss /= len(y_val_batches)
                print("Test Loss: %2.5f Accuracy: %2.5f" % (loss, acc))


# Model Graph - ProtoNN

In [7]:

class ProtoNN:
    def __init__(self, inputDimension, projectionDimension, numPrototypes,
                 numOutputLabels, gamma,
                 W = None, B = None, Z = None):
        '''
        Forward computation graph for ProtoNN.

        inputDimension: Input data dimension or feature dimension.
        projectionDimension: hyperparameter
        numPrototypes: hyperparameter
        numOutputLabels: The number of output labels or classes
        W, B, Z: Numpy matrices that can be used to initialize
            projection matrix(W), prototype matrix (B) and prototype labels
            matrix (B).
            Expected Dimensions:
                W   inputDimension (d) x projectionDimension (d_cap)
                B   projectionDimension (d_cap) x numPrototypes (m)
                Z   numOutputLabels (L) x numPrototypes (m)
        '''
        with tf.name_scope('protoNN') as ns:
            self.__nscope = ns
        self.__d = inputDimension
        self.__d_cap = projectionDimension
        self.__m = numPrototypes
        self.__L = numOutputLabels

        self.__inW = W
        self.__inB = B
        self.__inZ = Z
        self.__inGamma = gamma
        self.W, self.B, self.Z = None, None, None
        self.gamma = None

        self.__validInit = False
        self.__initWBZ()
        self.__initGamma()
        self.__validateInit()
        self.protoNNOut = None
        self.predictions = None
        self.accuracy = None

    def __validateInit(self):
        self.__validInit = False
        errmsg = "Dimensions mismatch! Should be W[d, d_cap]"
        errmsg += ", B[d_cap, m] and Z[L, m]"
        d, d_cap, m, L, _ = self.getHyperParams()
        assert self.W.shape[0] == d, errmsg
        assert self.W.shape[1] == d_cap, errmsg
        assert self.B.shape[0] == d_cap, errmsg
        assert self.B.shape[1] == m, errmsg
        assert self.Z.shape[0] == L, errmsg
        assert self.Z.shape[1] == m, errmsg
        self.__validInit = True

    def __initWBZ(self):
        with tf.name_scope(self.__nscope):
            W = self.__inW
            if W is None:
                W = tf.random_normal_initializer()
                W = W([self.__d, self.__d_cap])
            self.W = tf.Variable(W, name='W', dtype=tf.float32)

            B = self.__inB
            if B is None:
                B = tf.random_uniform_initializer()
                B = B([self.__d_cap, self.__m])
            self.B = tf.Variable(B, name='B', dtype=tf.float32)

            Z = self.__inZ
            if Z is None:
                Z = tf.random_normal_initializer()
                Z = Z([self.__L, self.__m])
            Z = tf.Variable(Z, name='Z', dtype=tf.float32)
            self.Z = Z
        return self.W, self.B, self.Z

    def __initGamma(self):
        with tf.name_scope(self.__nscope):
            gamma = self.__inGamma
            self.gamma = tf.constant(gamma, name='gamma')

    def getHyperParams(self):
        '''
        Returns the model hyperparameters:
            [inputDimension, projectionDimension,
            numPrototypes, numOutputLabels, gamma]
        '''
        d = self.__d
        dcap = self.__d_cap
        m = self.__m
        L = self.__L
        return d, dcap, m, L, self.gamma

    def getModelMatrices(self):
        '''
        Returns Tensorflow tensors of the model matrices, which
        can then be evaluated to obtain corresponding numpy arrays.

        These can then be exported as part of other implementations of
        ProtonNN, for instance a C++ implementation or pure python
        implementation.
        Returns
            [ProjectionMatrix (W), prototypeMatrix (B),
             prototypeLabelsMatrix (Z), gamma]
        '''
        return self.W, self.B, self.Z, self.gamma

    def __call__(self, X, Y=None):
        '''
        This method is responsible for construction of the forward computation
        graph. The end point of the computation graph, or in other words the
        output operator for the forward computation is returned. Additionally,
        if the argument Y is provided, a classification accuracy operator with
        Y as target will also be created. For this, Y is assumed to in one-hot
        encoded format and the class with the maximum prediction score is
        compared to the encoded class in Y.  This accuracy operator is returned
        by getAccuracyOp() method. If a different accuracyOp is required, it
        can be defined by overriding the createAccOp(protoNNScoresOut, Y)
        method.

        X: Input tensor or placeholder of shape [-1, inputDimension]
        Y: Optional tensor or placeholder for targets (labels or classes).
            Expected shape is [-1, numOutputLabels].
        returns: The forward computation outputs, self.protoNNOut
        '''
        # This should never execute
        assert self.__validInit is True, "Initialization failed!"
        if self.protoNNOut is not None:
            return self.protoNNOut

        W, B, Z, gamma = self.W, self.B, self.Z, self.gamma
        with tf.name_scope(self.__nscope):
            WX = tf.matmul(X, W)
            # Convert WX to tensor so that broadcasting can work
            dim = [-1, WX.shape.as_list()[1], 1]
            WX = tf.reshape(WX, dim)
            dim = [1, B.shape.as_list()[0], -1]
            B = tf.reshape(B, dim)
            l2sim = B - WX
            l2sim = tf.pow(l2sim, 2)
            l2sim = tf.reduce_sum(l2sim, 1, keepdims=True)
            self.l2sim = l2sim
            gammal2sim = (-1 * gamma * gamma) * l2sim
            M = tf.exp(gammal2sim)
            dim = [1] + Z.shape.as_list()
            Z = tf.reshape(Z, dim)
            y = tf.multiply(Z, M)
            y = tf.reduce_sum(y, 2, name='protoNNScoreOut')
            self.protoNNOut = y
            self.predictions = tf.argmax(y, 1, name='protoNNPredictions')
            if Y is not None:
                self.createAccOp(self.protoNNOut, Y)
        return y

    def createAccOp(self, outputs, target):
        '''
        Define an accuracy operation on ProtoNN's output scores and targets.
        Here a simple classification accuracy operator is defined. More
        complicated operators (for multiple label problems and so forth) can be
        defined by overriding this method
        '''
        assert self.predictions is not None
        target = tf.argmax(target, 1)
        correctPrediction = tf.equal(self.predictions, target)
        acc = tf.reduce_mean(tf.cast(correctPrediction, tf.float32),
                             name='protoNNAccuracy')
        self.accuracy = acc

    def getPredictionsOp(self):
        '''
        The predictions operator is defined as argmax(protoNNScores) for each
        prediction.
        '''
        return self.predictions

    def getAccuracyOp(self):
        '''
        returns accuracyOp as defined by createAccOp. It defaults to
        multi-class classification accuracy.
        '''
        msg = "Accuracy operator not defined in graph. Did you provide Y as an"
        msg += " argument to _call_?"
        assert self.accuracy is not None, msg
        return self.accuracy

**WINDOW 1**

# Obtain Data

It is assumed that the Daphnet data has already been downloaded,preprocessed and set up in subdirectory.

In [8]:
DATA_DIR = r"./experiments"
windowLen = 'data_w1'
out = preprocessData(DATA_DIR,windowLen)
dataDimension = out[0]
numClasses = out[1]
x_train, y_train = out[2], out[3]
x_test, y_test = out[4], out[5]
print("Feature Dimension: ", dataDimension)
print("Num classes: ", numClasses)




Feature Dimension:  423
Num classes:  2


In [9]:

DATA_DIR = r"./experiments"
train, test = np.load(DATA_DIR + '/ttrain_data_w1.npy'), np.load(DATA_DIR + '/ttest_data_w1.npy')
x_train, y_train = train[:, 1:], train[:, 0]
x_test, y_test = test[:, 1:], test[:, 0]

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

numClasses = max(y_train) - min(y_train) + 1
numClasses = max(numClasses, max(y_test) - min(y_test) + 1)
numClasses = int(numClasses)

y_train = helper.to_onehot(y_train, numClasses)
y_test = helper.to_onehot(y_test, numClasses)
y_val = helper.to_onehot(y_val, numClasses)

dataDimension = x_train.shape[1]
numClasses = y_train.shape[1]

# Model Parameters

Note that ProtoNN is very sensitive to the value of the hyperparameter $\gamma$, here stored in valiable `GAMMA`. If `GAMMA` is set to `None`, median heuristic will be used to estimate a good value of $\gamma$ through the `helper.getGamma()` method. This method also returns the corresponding `W` and `B` matrices which should be used to initialize ProtoNN (as is done here).

In [10]:
PROJECTION_DIM = 5 #d^
NUM_PROTOTYPES = 40 #m
REG_W = 0.000005
REG_B = 0.0
REG_Z = 0.00005
SPAR_W = 1.0
SPAR_B = 0.8
SPAR_Z = 0.8
LEARNING_RATE = 0.001
NUM_EPOCHS = 600
BATCH_SIZE = 2048
GAMMA = 0.007586

In [11]:
W, B, gamma = getGamma(GAMMA, PROJECTION_DIM, dataDimension,
                       NUM_PROTOTYPES, x_train)

In [12]:
gamma

0.007586

In [13]:
X = tf.placeholder(tf.float32, [None, dataDimension], name='X')
Y = tf.placeholder(tf.float32, [None, numClasses], name='Y')

In [14]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import confusion_matrix,classification_report
from functools import partial


X = tf.placeholder(tf.float32, [None, dataDimension], name='X')
Y = tf.placeholder(tf.float32, [None, numClasses], name='Y')
def objective(trial,x_train, x_test, y_train, y_test, x_val, y_val):
    
    W, B, gamma = getGamma(GAMMA, PROJECTION_DIM, dataDimension,
                       NUM_PROTOTYPES, x_train)
    # Inside the optimization function, you use the 'trial' object to suggest hyperparameters
    REG_W = trial.suggest_float('REG_W', 2e-6, 5e-6)
    REG_B = trial.suggest_float('REG_B', 0.0, 0.01)
    REG_Z = trial.suggest_float('REG_Z', 2e-5, 5e-5)
    SPAR_W = trial.suggest_float('SPAR_W', 0.5, 1.0)
    SPAR_B = trial.suggest_float('SPAR_B', 0.5, 1.0)
    SPAR_Z = trial.suggest_float('SPAR_Z', 0.5, 1.0)
    loss = trial.suggest_categorical('loss', ['l2', 'xentropy'])
        
    LEARNING_RATE = trial.suggest_float('LEARNING_RATE', 1e-4, 1e-3)
    NUM_EPOCHS = trial.suggest_int('NUM_EPOCHS', 200, 600)

    # Set the suggested hyperparameters in the trainer
    protoNN = ProtoNN(dataDimension, PROJECTION_DIM,
                  NUM_PROTOTYPES, numClasses,
                  gamma, W=W, B=B)
    
    trainer = ProtoNNTrainer(protoNN, REG_W, REG_B, REG_Z,
                         SPAR_W, SPAR_B, SPAR_Z,
                         LEARNING_RATE, X, Y, lossType=loss)
    
    # Call your ProtoNN trainer function or use it as needed
    sess = tf.Session()
    
        
    trainer.train(BATCH_SIZE, NUM_EPOCHS, sess, x_train, x_test, y_train, y_test,printStep=600, valStep=10)
    pred = sess.run(protoNN.predictions, feed_dict={X: x_val, Y: y_val})
    # W, B, Z are tensorflow graph nodes
    W, B, Z, _ = protoNN.getModelMatrices()
    matrixList = sess.run([W, B, Z])
    sparcityList = [SPAR_W, SPAR_B, SPAR_Z]                       
    nnz, size, sparse = getModelSize(matrixList, sparcityList)
    y_val = np.argmax(y_val,axis=1)
    sensitivity = confusion_matrix(y_val,pred)[1][1]/(confusion_matrix(y_val,pred)[1][1] + confusion_matrix(y_val,pred)[1][0])
    specificity = confusion_matrix(y_val,pred)[0][0]/(confusion_matrix(y_val,pred)[0][0] + confusion_matrix(y_val,pred)[0][1])

    alpha = trial.suggest_float('alpha', 0.0, 1.0)

    f1 = (2*sensitivity*specificity)/(sensitivity+specificity)
    mcc = matthews_corrcoef(y_val, pred)
    return alpha * f1 + (1 - alpha) * mcc


In [15]:
import optuna
study = optuna.create_study(direction='maximize', pruner = optuna.pruners.MedianPruner())


[I 2024-01-05 15:53:06,239] A new study created in memory with name: no-name-25f8a899-991d-4b05-8513-eefe96805523


In [16]:
op_fun = partial(objective,x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test, x_val = x_val, y_val = y_val)
study.optimize(op_fun,n_trials=3)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch:   0 Batch:   0 Loss: 32135.26367 Accuracy: 0.49768
Epoch:   1 Batch:   0 Loss: 32359.42383 Accuracy: 0.49768
Epoch:   2 Batch:   0 Loss: 31790.16016 Accuracy: 0.49768
Epoch:   3 Batch:   0 Loss: 31228.30664 Accuracy: 0.49768
Epoch:   4 Batch:   0 Loss: 30675.18359 Accuracy: 0.49768
Epoch:   5 Batch:   0 Loss: 30131.35938 Accuracy: 0.49768
Epoch:   6 Batch:   0 Loss: 29597.03711 Accuracy: 0.49768
Epoch:   7 Batch:   0 Loss: 29072.27539 Accuracy: 0.49768
Epoch:   8 Batch:   0 Loss: 28557.02930 Accuracy: 0.49768
Epoch:   9 Batch:   0 Loss: 28051.22070 Accuracy: 0.49768
Test Loss: 25997.18066 Accuracy: 0.49987
Epoch:  10 Batch:   0 Loss: 27554.69727 Accuracy: 0.49768
Epoch:  11 Batch:   0 Loss: 27067.28711 Accuracy: 0.49768
Epoch:  12 Batch:   0 Loss: 26588.84180 Accuracy: 0.49768
Epoch:  13 Batch:   0 Loss: 26119.19531 Accuracy: 0.49768
Epoch:  14 Batch:   0 Loss: 25658.15625 Accuracy: 0.4

Test Loss: 2883.37137 Accuracy: 0.49987
Epoch: 130 Batch:   0 Loss: 3095.94824 Accuracy: 0.49768
Epoch: 131 Batch:   0 Loss: 3037.91602 Accuracy: 0.49768
Epoch: 132 Batch:   0 Loss: 2981.00415 Accuracy: 0.49768
Epoch: 133 Batch:   0 Loss: 2925.18799 Accuracy: 0.49768
Epoch: 134 Batch:   0 Loss: 2870.45312 Accuracy: 0.49768
Epoch: 135 Batch:   0 Loss: 2816.78638 Accuracy: 0.49768
Epoch: 136 Batch:   0 Loss: 2764.16577 Accuracy: 0.49768
Epoch: 137 Batch:   0 Loss: 2712.56982 Accuracy: 0.49768
Epoch: 138 Batch:   0 Loss: 2661.98804 Accuracy: 0.49768
Epoch: 139 Batch:   0 Loss: 2612.40552 Accuracy: 0.49768
Test Loss: 2391.40504 Accuracy: 0.49987
Epoch: 140 Batch:   0 Loss: 2563.80176 Accuracy: 0.49768
Epoch: 141 Batch:   0 Loss: 2516.16187 Accuracy: 0.49768
Epoch: 142 Batch:   0 Loss: 2469.46558 Accuracy: 0.49768
Epoch: 143 Batch:   0 Loss: 2423.70117 Accuracy: 0.49768
Epoch: 144 Batch:   0 Loss: 2378.85229 Accuracy: 0.49768
Epoch: 145 Batch:   0 Loss: 2334.90479 Accuracy: 0.49768
Epoch: 1

Epoch: 266 Batch:   0 Loss: 544.73804 Accuracy: 0.49717
Epoch: 267 Batch:   0 Loss: 542.83069 Accuracy: 0.49717
Epoch: 268 Batch:   0 Loss: 540.98309 Accuracy: 0.49665
Epoch: 269 Batch:   0 Loss: 539.19287 Accuracy: 0.49665
Test Loss: 534.94272 Accuracy: 0.49961
Epoch: 270 Batch:   0 Loss: 537.45929 Accuracy: 0.49665
Epoch: 271 Batch:   0 Loss: 535.77985 Accuracy: 0.49717
Epoch: 272 Batch:   0 Loss: 534.15332 Accuracy: 0.49717
Epoch: 273 Batch:   0 Loss: 532.57812 Accuracy: 0.49717
Epoch: 274 Batch:   0 Loss: 531.05249 Accuracy: 0.49665
Epoch: 275 Batch:   0 Loss: 529.57520 Accuracy: 0.49665
Epoch: 276 Batch:   0 Loss: 528.14502 Accuracy: 0.49665
Epoch: 277 Batch:   0 Loss: 526.75995 Accuracy: 0.49665
Epoch: 278 Batch:   0 Loss: 525.41925 Accuracy: 0.49665
Epoch: 279 Batch:   0 Loss: 524.12109 Accuracy: 0.49717
Test Loss: 522.06337 Accuracy: 0.49974
Epoch: 280 Batch:   0 Loss: 522.86444 Accuracy: 0.49768
Epoch: 281 Batch:   0 Loss: 521.64795 Accuracy: 0.49820
Epoch: 282 Batch:   0 Loss

Epoch: 404 Batch:   0 Loss: 481.47562 Accuracy: 0.66855
Epoch: 405 Batch:   0 Loss: 481.37314 Accuracy: 0.66804
Epoch: 406 Batch:   0 Loss: 481.27078 Accuracy: 0.66907
Epoch: 407 Batch:   0 Loss: 481.16849 Accuracy: 0.66855
Epoch: 408 Batch:   0 Loss: 481.06644 Accuracy: 0.66855
Epoch: 409 Batch:   0 Loss: 480.96301 Accuracy: 0.66958
Test Loss: 489.86151 Accuracy: 0.58233
Epoch: 410 Batch:   0 Loss: 480.86218 Accuracy: 0.66907
Epoch: 411 Batch:   0 Loss: 480.76151 Accuracy: 0.66958
Epoch: 412 Batch:   0 Loss: 480.66104 Accuracy: 0.67113
Epoch: 413 Batch:   0 Loss: 480.56070 Accuracy: 0.67061
Epoch: 414 Batch:   0 Loss: 480.46063 Accuracy: 0.67164
Epoch: 415 Batch:   0 Loss: 480.36047 Accuracy: 0.67113
Epoch: 416 Batch:   0 Loss: 480.26077 Accuracy: 0.67010
Epoch: 417 Batch:   0 Loss: 480.16089 Accuracy: 0.67010
Epoch: 418 Batch:   0 Loss: 480.06131 Accuracy: 0.67061
Epoch: 419 Batch:   0 Loss: 479.95935 Accuracy: 0.67061
Test Loss: 489.43744 Accuracy: 0.58390
Epoch: 420 Batch:   0 Loss

Epoch: 541 Batch:   0 Loss: 467.59650 Accuracy: 0.66855
Epoch: 542 Batch:   0 Loss: 467.48901 Accuracy: 0.66855
Epoch: 543 Batch:   0 Loss: 467.38144 Accuracy: 0.66855
Epoch: 544 Batch:   0 Loss: 467.27380 Accuracy: 0.66855
Epoch: 545 Batch:   0 Loss: 467.16605 Accuracy: 0.66855
Epoch: 546 Batch:   0 Loss: 467.05640 Accuracy: 0.66907
Epoch: 547 Batch:   0 Loss: 466.94827 Accuracy: 0.66907
Epoch: 548 Batch:   0 Loss: 466.84006 Accuracy: 0.66907
Epoch: 549 Batch:   0 Loss: 466.73169 Accuracy: 0.66907
Test Loss: 483.82401 Accuracy: 0.58180
Epoch: 550 Batch:   0 Loss: 466.62329 Accuracy: 0.66907
Epoch: 551 Batch:   0 Loss: 466.51483 Accuracy: 0.66907
Epoch: 552 Batch:   0 Loss: 466.40616 Accuracy: 0.66907
Epoch: 553 Batch:   0 Loss: 466.29758 Accuracy: 0.66907
Epoch: 554 Batch:   0 Loss: 466.18875 Accuracy: 0.66907
Epoch: 555 Batch:   0 Loss: 466.07990 Accuracy: 0.66907
Epoch: 556 Batch:   0 Loss: 465.97086 Accuracy: 0.66907
Epoch: 557 Batch:   0 Loss: 465.85962 Accuracy: 0.66907
Epoch: 55

[I 2024-01-05 15:54:29,116] Trial 0 finished with value: 0.3663150752341066 and parameters: {'REG_W': 4.20001642467615e-06, 'REG_B': 0.002838626418152933, 'REG_Z': 2.0542461864837463e-05, 'SPAR_W': 0.8176662878132267, 'SPAR_B': 0.6441024812808256, 'SPAR_Z': 0.8082769449948486, 'loss': 'l2', 'LEARNING_RATE': 0.00021258085967771273, 'NUM_EPOCHS': 576, 'alpha': 0.11907989359869131}. Best is trial 0 with value: 0.3663150752341066.


Epoch:   0 Batch:   0 Loss: 10902.04980 Accuracy: 0.49768
Epoch:   1 Batch:   0 Loss: 10324.69238 Accuracy: 0.49768
Epoch:   2 Batch:   0 Loss: 9766.38184 Accuracy: 0.49768
Epoch:   3 Batch:   0 Loss: 9232.92188 Accuracy: 0.49768
Epoch:   4 Batch:   0 Loss: 8725.19629 Accuracy: 0.49768
Epoch:   5 Batch:   0 Loss: 8243.52930 Accuracy: 0.49768
Epoch:   6 Batch:   0 Loss: 7787.74902 Accuracy: 0.49768
Epoch:   7 Batch:   0 Loss: 7357.27783 Accuracy: 0.49768
Epoch:   8 Batch:   0 Loss: 6951.28418 Accuracy: 0.49768
Epoch:   9 Batch:   0 Loss: 6568.71973 Accuracy: 0.49768
Test Loss: 5961.26831 Accuracy: 0.49987
Epoch:  10 Batch:   0 Loss: 6208.46729 Accuracy: 0.49768
Epoch:  11 Batch:   0 Loss: 5869.32812 Accuracy: 0.49768
Epoch:  12 Batch:   0 Loss: 5550.10400 Accuracy: 0.49768
Epoch:  13 Batch:   0 Loss: 5249.61816 Accuracy: 0.49768
Epoch:  14 Batch:   0 Loss: 4966.71826 Accuracy: 0.49768
Epoch:  15 Batch:   0 Loss: 4700.31396 Accuracy: 0.49768
Epoch:  16 Batch:   0 Loss: 4449.35840 Accurac

Epoch: 138 Batch:   0 Loss: 489.73010 Accuracy: 0.68296
Epoch: 139 Batch:   0 Loss: 489.63882 Accuracy: 0.68348
Test Loss: 486.62061 Accuracy: 0.64537
Epoch: 140 Batch:   0 Loss: 489.54791 Accuracy: 0.68399
Epoch: 141 Batch:   0 Loss: 489.45700 Accuracy: 0.68348
Epoch: 142 Batch:   0 Loss: 489.36633 Accuracy: 0.68399
Epoch: 143 Batch:   0 Loss: 489.27560 Accuracy: 0.68451
Epoch: 144 Batch:   0 Loss: 489.18503 Accuracy: 0.68554
Epoch: 145 Batch:   0 Loss: 489.09460 Accuracy: 0.68605
Epoch: 146 Batch:   0 Loss: 489.00403 Accuracy: 0.68605
Epoch: 147 Batch:   0 Loss: 488.91354 Accuracy: 0.68657
Epoch: 148 Batch:   0 Loss: 488.82300 Accuracy: 0.68657
Epoch: 149 Batch:   0 Loss: 488.73270 Accuracy: 0.68657
Test Loss: 485.97265 Accuracy: 0.64445
Epoch: 150 Batch:   0 Loss: 488.64221 Accuracy: 0.68708
Epoch: 151 Batch:   0 Loss: 488.55157 Accuracy: 0.68708
Epoch: 152 Batch:   0 Loss: 488.46109 Accuracy: 0.68708
Epoch: 153 Batch:   0 Loss: 488.37051 Accuracy: 0.68708
Epoch: 154 Batch:   0 Loss

Epoch: 276 Batch:   0 Loss: 476.03763 Accuracy: 0.70407
Epoch: 277 Batch:   0 Loss: 475.92520 Accuracy: 0.70407
Epoch: 278 Batch:   0 Loss: 475.81223 Accuracy: 0.70407
Epoch: 279 Batch:   0 Loss: 475.69913 Accuracy: 0.70355
Test Loss: 477.75079 Accuracy: 0.64573
Epoch: 280 Batch:   0 Loss: 475.58548 Accuracy: 0.70355
Epoch: 281 Batch:   0 Loss: 475.47165 Accuracy: 0.70355
Epoch: 282 Batch:   0 Loss: 475.35739 Accuracy: 0.70304
Epoch: 283 Batch:   0 Loss: 475.24271 Accuracy: 0.70355
Epoch: 284 Batch:   0 Loss: 475.12784 Accuracy: 0.70355
Epoch: 285 Batch:   0 Loss: 475.01260 Accuracy: 0.70355
Epoch: 286 Batch:   0 Loss: 474.89691 Accuracy: 0.70355
Epoch: 287 Batch:   0 Loss: 474.78076 Accuracy: 0.70407
Epoch: 288 Batch:   0 Loss: 474.66446 Accuracy: 0.70407
Epoch: 289 Batch:   0 Loss: 474.54776 Accuracy: 0.70407
Test Loss: 477.10758 Accuracy: 0.64521
Epoch: 290 Batch:   0 Loss: 474.43054 Accuracy: 0.70407
Epoch: 291 Batch:   0 Loss: 474.31317 Accuracy: 0.70458
Epoch: 292 Batch:   0 Loss

[I 2024-01-05 15:55:14,817] Trial 1 finished with value: 0.4822216572117298 and parameters: {'REG_W': 2.150499128114348e-06, 'REG_B': 0.0013123991795708923, 'REG_Z': 2.7678514122803996e-05, 'SPAR_W': 0.9022371274865826, 'SPAR_B': 0.5539345534584593, 'SPAR_Z': 0.9993522107302426, 'loss': 'l2', 'LEARNING_RATE': 0.00031975053586635394, 'NUM_EPOCHS': 300, 'alpha': 0.31386241034032014}. Best is trial 1 with value: 0.4822216572117298.


Test Loss: 476.44933 Accuracy: 0.64455
Epoch:   0 Batch:   0 Loss: 2.93354 Accuracy: 0.49768
Epoch:   1 Batch:   0 Loss: 2.47058 Accuracy: 0.49768
Epoch:   2 Batch:   0 Loss: 2.38001 Accuracy: 0.49768
Epoch:   3 Batch:   0 Loss: 2.29064 Accuracy: 0.49768
Epoch:   4 Batch:   0 Loss: 2.20256 Accuracy: 0.49768
Epoch:   5 Batch:   0 Loss: 2.11593 Accuracy: 0.49768
Epoch:   6 Batch:   0 Loss: 2.03092 Accuracy: 0.49768
Epoch:   7 Batch:   0 Loss: 1.94775 Accuracy: 0.49768
Epoch:   8 Batch:   0 Loss: 1.86662 Accuracy: 0.49768
Epoch:   9 Batch:   0 Loss: 1.78777 Accuracy: 0.49768
Test Loss: 1.72032 Accuracy: 0.49987
Epoch:  10 Batch:   0 Loss: 1.71142 Accuracy: 0.49768
Epoch:  11 Batch:   0 Loss: 1.63784 Accuracy: 0.49768
Epoch:  12 Batch:   0 Loss: 1.56728 Accuracy: 0.49768
Epoch:  13 Batch:   0 Loss: 1.50000 Accuracy: 0.49768
Epoch:  14 Batch:   0 Loss: 1.43628 Accuracy: 0.49768
Epoch:  15 Batch:   0 Loss: 1.37635 Accuracy: 0.49768
Epoch:  16 Batch:   0 Loss: 1.32043 Accuracy: 0.49768
Epoch:

Epoch: 143 Batch:   0 Loss: 0.77431 Accuracy: 0.55275
Epoch: 144 Batch:   0 Loss: 0.77347 Accuracy: 0.55584
Epoch: 145 Batch:   0 Loss: 0.77264 Accuracy: 0.55790
Epoch: 146 Batch:   0 Loss: 0.77182 Accuracy: 0.55739
Epoch: 147 Batch:   0 Loss: 0.77100 Accuracy: 0.55944
Epoch: 148 Batch:   0 Loss: 0.77019 Accuracy: 0.56047
Epoch: 149 Batch:   0 Loss: 0.76939 Accuracy: 0.56150
Test Loss: 0.76945 Accuracy: 0.47146
Epoch: 150 Batch:   0 Loss: 0.76860 Accuracy: 0.55996
Epoch: 151 Batch:   0 Loss: 0.76782 Accuracy: 0.56047
Epoch: 152 Batch:   0 Loss: 0.76704 Accuracy: 0.56047
Epoch: 153 Batch:   0 Loss: 0.76626 Accuracy: 0.55790
Epoch: 154 Batch:   0 Loss: 0.76550 Accuracy: 0.56047
Epoch: 155 Batch:   0 Loss: 0.76474 Accuracy: 0.56253
Epoch: 156 Batch:   0 Loss: 0.76399 Accuracy: 0.56202
Epoch: 157 Batch:   0 Loss: 0.76325 Accuracy: 0.56253
Epoch: 158 Batch:   0 Loss: 0.76251 Accuracy: 0.56356
Epoch: 159 Batch:   0 Loss: 0.76178 Accuracy: 0.56819
Test Loss: 0.76191 Accuracy: 0.48807
Epoch: 1

Epoch: 287 Batch:   0 Loss: 0.70854 Accuracy: 0.50592
Epoch: 288 Batch:   0 Loss: 0.70832 Accuracy: 0.50489
Epoch: 289 Batch:   0 Loss: 0.70811 Accuracy: 0.50489
Test Loss: 0.70883 Accuracy: 0.50444
Epoch: 290 Batch:   0 Loss: 0.70790 Accuracy: 0.50489
Epoch: 291 Batch:   0 Loss: 0.70769 Accuracy: 0.50489
Epoch: 292 Batch:   0 Loss: 0.70748 Accuracy: 0.50489
Epoch: 293 Batch:   0 Loss: 0.70728 Accuracy: 0.50437
Epoch: 294 Batch:   0 Loss: 0.70707 Accuracy: 0.50386
Epoch: 295 Batch:   0 Loss: 0.70687 Accuracy: 0.50386
Epoch: 296 Batch:   0 Loss: 0.70667 Accuracy: 0.50386
Epoch: 297 Batch:   0 Loss: 0.70647 Accuracy: 0.50386
Epoch: 298 Batch:   0 Loss: 0.70627 Accuracy: 0.50386
Epoch: 299 Batch:   0 Loss: 0.70608 Accuracy: 0.50335
Test Loss: 0.70687 Accuracy: 0.50065
Epoch: 300 Batch:   0 Loss: 0.70588 Accuracy: 0.50335
Epoch: 301 Batch:   0 Loss: 0.70569 Accuracy: 0.50335
Epoch: 302 Batch:   0 Loss: 0.70550 Accuracy: 0.50335
Epoch: 303 Batch:   0 Loss: 0.70532 Accuracy: 0.50335
Epoch: 3

Epoch: 430 Batch:   0 Loss: 0.66768 Accuracy: 0.50232
Epoch: 431 Batch:   0 Loss: 0.66707 Accuracy: 0.50232
Epoch: 432 Batch:   0 Loss: 0.66645 Accuracy: 0.50232
Epoch: 433 Batch:   0 Loss: 0.66583 Accuracy: 0.50232
Epoch: 434 Batch:   0 Loss: 0.66520 Accuracy: 0.50232
Epoch: 435 Batch:   0 Loss: 0.66457 Accuracy: 0.50232
Epoch: 436 Batch:   0 Loss: 0.66393 Accuracy: 0.50232
Epoch: 437 Batch:   0 Loss: 0.66329 Accuracy: 0.50232
Epoch: 438 Batch:   0 Loss: 0.66264 Accuracy: 0.50232
Epoch: 439 Batch:   0 Loss: 0.66199 Accuracy: 0.50232
Test Loss: 0.67892 Accuracy: 0.50013
Epoch: 440 Batch:   0 Loss: 0.66132 Accuracy: 0.50232
Epoch: 441 Batch:   0 Loss: 0.66066 Accuracy: 0.50232
Epoch: 442 Batch:   0 Loss: 0.65999 Accuracy: 0.50232
Epoch: 443 Batch:   0 Loss: 0.65931 Accuracy: 0.50232
Epoch: 444 Batch:   0 Loss: 0.65863 Accuracy: 0.50232
Epoch: 445 Batch:   0 Loss: 0.65795 Accuracy: 0.50232
Epoch: 446 Batch:   0 Loss: 0.65726 Accuracy: 0.50232
Epoch: 447 Batch:   0 Loss: 0.65656 Accuracy:

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
[I 2024-01-05 15:56:28,799] Trial 2 finished with value: 0.0 and parameters: {'REG_W': 4.585711316360162e-06, 'REG_B': 0.009570099408400576, 'REG_Z': 2.310643699106014e-05, 'SPAR_W': 0.9066577093571926, 'SPAR_B': 0.5722913519872839, 'SPAR_Z': 0.7526557798361891, 'loss': 'xentropy', 'LEARNING_RATE': 0.0005282606618032462, 'NUM_EPOCHS': 479, 'alpha': 0.73882098493583}. Best is trial 1 with value: 0.4822216572117298.


Epoch: 478 Batch:   0 Loss: 0.63376 Accuracy: 0.50232


In [17]:
study.best_params

{'REG_W': 2.150499128114348e-06,
 'REG_B': 0.0013123991795708923,
 'REG_Z': 2.7678514122803996e-05,
 'SPAR_W': 0.9022371274865826,
 'SPAR_B': 0.5539345534584593,
 'SPAR_Z': 0.9993522107302426,
 'loss': 'l2',
 'LEARNING_RATE': 0.00031975053586635394,
 'NUM_EPOCHS': 300,
 'alpha': 0.31386241034032014}

In [18]:
PROJECTION_DIM = 5 #d^
NUM_PROTOTYPES = 40 #m
REG_W = study.best_params['REG_W']
REG_B = study.best_params['REG_B']
REG_Z = study.best_params['REG_Z']
SPAR_W = study.best_params['SPAR_W']
SPAR_B = study.best_params['SPAR_B']
SPAR_Z = study.best_params['SPAR_Z']
loss = study.best_params['loss']
LEARNING_RATE = study.best_params['LEARNING_RATE']
NUM_EPOCHS = study.best_params['NUM_EPOCHS']
BATCH_SIZE = 2048
GAMMA = gamma



In [19]:
# PROJECTION_DIM = 5 #d^
# NUM_PROTOTYPES = 40 #m
# REG_W = hyper['REG_W']
# REG_B = hyper['REG_B']
# REG_Z = hyper['REG_Z']
# SPAR_W = hyper['SPAR_W']
# SPAR_B = hyper['SPAR_B']
# SPAR_Z = hyper['SPAR_Z']
# LEARNING_RATE = hyper['LEARNING_RATE']
# NUM_EPOCHS = 600
# BATCH_SIZE = 32
# GAMMA = gamma

# Model Training

In [20]:

# Setup input and train protoNN
# {'REG_W': 2.8881784502872485e-06,
#  'REG_B': 0.0035302454747415833,
#  'REG_Z': 3.3834584511295385e-05,
#  'SPAR_W': 0.7925887710548198,
#  'SPAR_B': 0.539183122073297,
#  'SPAR_Z': 0.6111926798340185,
#  'loss': 'l2',
#  'LEARNING_RATE': 0.0008050041923932209,
#  'NUM_EPOCHS': 460}
X = tf.placeholder(tf.float32, [None, dataDimension], name='X')
Y = tf.placeholder(tf.float32, [None, numClasses], name='Y')
protoNN = ProtoNN(dataDimension, PROJECTION_DIM,
                  NUM_PROTOTYPES, numClasses,
                  gamma, W=W, B=B)
trainer = ProtoNNTrainer(protoNN,  REG_W, REG_B, REG_Z,
                         SPAR_W, SPAR_B, SPAR_Z,
                        LEARNING_RATE, X, Y, lossType='l2')
sess = tf.Session()

trainer.train(2048, 800, sess, x_train, x_test, y_train, y_test,
              printStep=600, valStep=10)


Epoch:   0 Batch:   0 Loss: 2451.24023 Accuracy: 0.50926
Epoch:   1 Batch:   0 Loss: 2074.21533 Accuracy: 0.50926
Epoch:   2 Batch:   0 Loss: 1743.72595 Accuracy: 0.50926
Epoch:   3 Batch:   0 Loss: 1460.87170 Accuracy: 0.50926
Epoch:   4 Batch:   0 Loss: 1223.36365 Accuracy: 0.50926
Epoch:   5 Batch:   0 Loss: 1026.71033 Accuracy: 0.50926
Epoch:   6 Batch:   0 Loss: 865.61444 Accuracy: 0.50926
Epoch:   7 Batch:   0 Loss: 734.78174 Accuracy: 0.50926
Epoch:   8 Batch:   0 Loss: 629.30042 Accuracy: 0.50926
Epoch:   9 Batch:   0 Loss: 544.80389 Accuracy: 0.50926
Test Loss: 477.86724 Accuracy: 0.50013
Epoch:  10 Batch:   0 Loss: 477.50461 Accuracy: 0.50926
Epoch:  11 Batch:   0 Loss: 424.18887 Accuracy: 0.50926
Epoch:  12 Batch:   0 Loss: 382.17645 Accuracy: 0.50926
Epoch:  13 Batch:   0 Loss: 349.25397 Accuracy: 0.50926
Epoch:  14 Batch:   0 Loss: 323.61194 Accuracy: 0.50926
Epoch:  15 Batch:   0 Loss: 303.77664 Accuracy: 0.50926
Epoch:  16 Batch:   0 Loss: 288.55142 Accuracy: 0.50926
Epo

Epoch: 138 Batch:   0 Loss: 224.24927 Accuracy: 0.72428
Epoch: 139 Batch:   0 Loss: 223.93344 Accuracy: 0.72325
Test Loss: 229.80793 Accuracy: 0.66756
Epoch: 140 Batch:   0 Loss: 223.60954 Accuracy: 0.72325
Epoch: 141 Batch:   0 Loss: 223.27963 Accuracy: 0.72428
Epoch: 142 Batch:   0 Loss: 222.95563 Accuracy: 0.72634
Epoch: 143 Batch:   0 Loss: 222.62927 Accuracy: 0.72634
Epoch: 144 Batch:   0 Loss: 222.30005 Accuracy: 0.72531
Epoch: 145 Batch:   0 Loss: 221.96027 Accuracy: 0.72737
Epoch: 146 Batch:   0 Loss: 221.62448 Accuracy: 0.72942
Epoch: 147 Batch:   0 Loss: 221.28580 Accuracy: 0.73148
Epoch: 148 Batch:   0 Loss: 220.94569 Accuracy: 0.73251
Epoch: 149 Batch:   0 Loss: 220.60274 Accuracy: 0.73457
Test Loss: 227.66409 Accuracy: 0.67383
Epoch: 150 Batch:   0 Loss: 220.25867 Accuracy: 0.73971
Epoch: 151 Batch:   0 Loss: 219.91237 Accuracy: 0.74280
Epoch: 152 Batch:   0 Loss: 219.56287 Accuracy: 0.74383
Epoch: 153 Batch:   0 Loss: 219.21159 Accuracy: 0.74691
Epoch: 154 Batch:   0 Loss

Epoch: 275 Batch:   0 Loss: 170.45360 Accuracy: 0.86111
Epoch: 276 Batch:   0 Loss: 170.06297 Accuracy: 0.86111
Epoch: 277 Batch:   0 Loss: 169.67278 Accuracy: 0.86214
Epoch: 278 Batch:   0 Loss: 169.28302 Accuracy: 0.86214
Epoch: 279 Batch:   0 Loss: 168.89392 Accuracy: 0.86214
Test Loss: 197.50776 Accuracy: 0.75218
Epoch: 280 Batch:   0 Loss: 168.50591 Accuracy: 0.86214
Epoch: 281 Batch:   0 Loss: 168.11650 Accuracy: 0.86214
Epoch: 282 Batch:   0 Loss: 167.72818 Accuracy: 0.86214
Epoch: 283 Batch:   0 Loss: 167.34050 Accuracy: 0.86317
Epoch: 284 Batch:   0 Loss: 166.95360 Accuracy: 0.86317
Epoch: 285 Batch:   0 Loss: 166.56729 Accuracy: 0.86317
Epoch: 286 Batch:   0 Loss: 166.18208 Accuracy: 0.86420
Epoch: 287 Batch:   0 Loss: 165.79742 Accuracy: 0.86420
Epoch: 288 Batch:   0 Loss: 165.41367 Accuracy: 0.86420
Epoch: 289 Batch:   0 Loss: 165.03093 Accuracy: 0.86523
Test Loss: 195.81694 Accuracy: 0.75440
Epoch: 290 Batch:   0 Loss: 164.64886 Accuracy: 0.86523
Epoch: 291 Batch:   0 Loss

Epoch: 412 Batch:   0 Loss: 123.67459 Accuracy: 0.88889
Epoch: 413 Batch:   0 Loss: 123.38446 Accuracy: 0.88889
Epoch: 414 Batch:   0 Loss: 123.09527 Accuracy: 0.88889
Epoch: 415 Batch:   0 Loss: 122.80695 Accuracy: 0.88889
Epoch: 416 Batch:   0 Loss: 122.51919 Accuracy: 0.88889
Epoch: 417 Batch:   0 Loss: 122.23209 Accuracy: 0.88889
Epoch: 418 Batch:   0 Loss: 121.94585 Accuracy: 0.88889
Epoch: 419 Batch:   0 Loss: 121.66039 Accuracy: 0.88889
Test Loss: 178.46993 Accuracy: 0.76302
Epoch: 420 Batch:   0 Loss: 121.37563 Accuracy: 0.88889
Epoch: 421 Batch:   0 Loss: 121.09156 Accuracy: 0.88889
Epoch: 422 Batch:   0 Loss: 120.80815 Accuracy: 0.88992
Epoch: 423 Batch:   0 Loss: 120.52553 Accuracy: 0.88992
Epoch: 424 Batch:   0 Loss: 120.24365 Accuracy: 0.89095
Epoch: 425 Batch:   0 Loss: 119.96241 Accuracy: 0.89198
Epoch: 426 Batch:   0 Loss: 119.68205 Accuracy: 0.89095
Epoch: 427 Batch:   0 Loss: 119.40240 Accuracy: 0.89095
Epoch: 428 Batch:   0 Loss: 119.12359 Accuracy: 0.89095
Epoch: 42

Epoch: 552 Batch:   0 Loss: 91.20496 Accuracy: 0.89403
Epoch: 553 Batch:   0 Loss: 91.04049 Accuracy: 0.89403
Epoch: 554 Batch:   0 Loss: 90.87702 Accuracy: 0.89403
Epoch: 555 Batch:   0 Loss: 90.71461 Accuracy: 0.89403
Epoch: 556 Batch:   0 Loss: 90.55315 Accuracy: 0.89403
Epoch: 557 Batch:   0 Loss: 90.39311 Accuracy: 0.89403
Epoch: 558 Batch:   0 Loss: 90.23405 Accuracy: 0.89403
Epoch: 559 Batch:   0 Loss: 90.07581 Accuracy: 0.89403
Test Loss: 167.80643 Accuracy: 0.76694
Epoch: 560 Batch:   0 Loss: 89.91855 Accuracy: 0.89403
Epoch: 561 Batch:   0 Loss: 89.76235 Accuracy: 0.89403
Epoch: 562 Batch:   0 Loss: 89.60722 Accuracy: 0.89403
Epoch: 563 Batch:   0 Loss: 89.45299 Accuracy: 0.89403
Epoch: 564 Batch:   0 Loss: 89.29990 Accuracy: 0.89403
Epoch: 565 Batch:   0 Loss: 89.14786 Accuracy: 0.89403
Epoch: 566 Batch:   0 Loss: 88.99692 Accuracy: 0.89403
Epoch: 567 Batch:   0 Loss: 88.84698 Accuracy: 0.89403
Epoch: 568 Batch:   0 Loss: 88.69809 Accuracy: 0.89403
Epoch: 569 Batch:   0 Loss

Epoch: 692 Batch:   0 Loss: 77.22261 Accuracy: 0.89815
Epoch: 693 Batch:   0 Loss: 77.17500 Accuracy: 0.89815
Epoch: 694 Batch:   0 Loss: 77.12791 Accuracy: 0.89918
Epoch: 695 Batch:   0 Loss: 77.08129 Accuracy: 0.89918
Epoch: 696 Batch:   0 Loss: 77.03519 Accuracy: 0.89918
Epoch: 697 Batch:   0 Loss: 76.98958 Accuracy: 0.89918
Epoch: 698 Batch:   0 Loss: 76.94447 Accuracy: 0.89918
Epoch: 699 Batch:   0 Loss: 76.89973 Accuracy: 0.89918
Test Loss: 168.09354 Accuracy: 0.76681
Epoch: 700 Batch:   0 Loss: 76.85545 Accuracy: 0.89918
Epoch: 701 Batch:   0 Loss: 76.81166 Accuracy: 0.89918
Epoch: 702 Batch:   0 Loss: 76.76834 Accuracy: 0.90021
Epoch: 703 Batch:   0 Loss: 76.72546 Accuracy: 0.90021
Epoch: 704 Batch:   0 Loss: 76.68304 Accuracy: 0.90021
Epoch: 705 Batch:   0 Loss: 76.64101 Accuracy: 0.90021
Epoch: 706 Batch:   0 Loss: 76.59948 Accuracy: 0.90123
Epoch: 707 Batch:   0 Loss: 76.55843 Accuracy: 0.90123
Epoch: 708 Batch:   0 Loss: 76.51775 Accuracy: 0.90123
Epoch: 709 Batch:   0 Loss

# Model Evaluation

In [22]:
acc = sess.run(protoNN.accuracy, feed_dict={X: x_test, Y: y_test})
pred = sess.run(protoNN.predictions, feed_dict={X: x_test, Y: y_test})
# W, B, Z are tensorflow graph nodes
W, B, Z, _ = protoNN.getModelMatrices()
matrixList = sess.run([W, B, Z])
sparcityList = [SPAR_W, SPAR_B, SPAR_Z]                       
nnz, size, sparse = getModelSize(matrixList, sparcityList)
print("Final test accuracy", acc)
print("Model size constraint (Bytes): ", size)
print("Number of non-zeros: ", nnz)

Final test accuracy 0.76691926
Model size constraint (Bytes):  9580
Number of non-zeros:  2395


In [23]:
from sklearn.metrics import confusion_matrix,classification_report
y_test = np.argmax(y_test,axis=1)
print (confusion_matrix(y_test,pred))
print (classification_report(y_test,pred,digits=5))

[[2521 1306]
 [ 478 3349]]
              precision    recall  f1-score   support

           0    0.84061   0.65874   0.73865      3827
           1    0.71944   0.87510   0.78967      3827

    accuracy                        0.76692      7654
   macro avg    0.78003   0.76692   0.76416      7654
weighted avg    0.78003   0.76692   0.76416      7654



In [24]:
sensitivity = confusion_matrix(y_test,pred)[1][1]/(confusion_matrix(y_test,pred)[1][1] + confusion_matrix(y_test,pred)[1][0])
sensitivity

0.8750979879801412

In [25]:
specificity = confusion_matrix(y_test,pred)[0][0]/(confusion_matrix(y_test,pred)[0][0] + confusion_matrix(y_test,pred)[0][1])
specificity

0.6587405278285864

**WINDOW 2**

In [26]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

from __future__ import print_function
import sys
import os
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

#sys.path.insert(0, '../../')
# from edgeml.trainer.protoNNTrainer import ProtoNNTrainer
# from edgeml.graph.protoNN import ProtoNN
# import edgeml.utils as utils
# import helpermethods as helper
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV


sys.path.append(r"E:\programming\practice\research\optimized code\EdgeML\examples\tf\ProtoNN")
import helpermethods as helper

In [27]:
#helper methods
sys.path.insert(0, '../')
import argparse


def getModelSize(matrixList, sparcityList, expected=True, bytesPerVar=4):
    '''
    expected: Expected size according to the parameters set. The number of
        zeros could actually be more than that is required to satisfy the
        sparsity constraint.
    '''
    nnzList, sizeList, isSparseList = [], [], []
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        assert A.ndim == 2
        assert s >= 0
        assert s <= 1
        nnz, size, sparse = countnnZ(A, s, bytesPerVar=bytesPerVar)
        nnzList.append(nnz)
        sizeList.append(size)
        hasSparse = (hasSparse or sparse)

    totalnnZ = np.sum(nnzList)
    totalSize = np.sum(sizeList)
    if expected:
        return totalnnZ, totalSize, hasSparse
    numNonZero = 0
    totalSize = 0
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        numNonZero_ = np.count_nonzero(A)
        numNonZero += numNonZero_
        hasSparse = (hasSparse or (s < 0.5))
        if s <= 0.5:
            totalSize += numNonZero_ * 2 * bytesPerVar
        else:
            totalSize += A.size * bytesPerVar
    return numNonZero, totalSize, hasSparse


def getGamma(gammaInit, projectionDim, dataDim, numPrototypes, x_train):
    if gammaInit is None:
        print("Using median heuristic to estimate gamma.")
        gamma, W, B = medianHeuristic(x_train, projectionDim,
                                            numPrototypes)
        print("Gamma estimate is: %f" % gamma)
        return W, B, gamma
    return None, None, gammaInit


def preprocessData(dataDir,w):
    '''
    Loads data from the dataDir and does some initial preprocessing
    steps. Data is assumed to be contained in two files,
    train.npy and test.npy. Each containing a 2D numpy array of dimension
    [numberOfExamples, numberOfFeatures + 1]. The first column of each
    matrix is assumed to contain label information.

    For an N-Class problem, we assume the labels are integers from 0 through
    N-1.
    '''
    # Uncomment for usual training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')
    # Uncomment for time domain training data
    train = np.load(dataDir + '/ttrain_'+str(w)+'.npy')
    test = np.load(dataDir + '/ttest_'+str(w)+'.npy')
    # Uncomment for 1 sensordrop training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')

    dataDimension = int(train.shape[1]) - 1
    x_train = train[:, 1:dataDimension + 1]
    y_train_ = train[:, 0]
    x_test = test[:, 1:dataDimension + 1]
    y_test_ = test[:, 0]

    numClasses = max(y_train_) - min(y_train_) + 1
    numClasses = max(numClasses, max(y_test_) - min(y_test_) + 1)
    numClasses = int(numClasses)

    # mean-var
    mean = np.mean(x_train, 0)
    std = np.std(x_train, 0)
    std[std[:] < 0.000001] = 1
    x_train = (x_train - mean) / std
    x_test = (x_test - mean) / std

    # one hot y-train
    lab = y_train_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_train.shape[0], numClasses))
    lab_[np.arange(x_train.shape[0]), lab] = 1
    y_train = lab_

    # one hot y-test
    lab = y_test_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_test.shape[0], numClasses))
    lab_[np.arange(x_test.shape[0]), lab] = 1
    y_test = lab_

    return dataDimension, numClasses, x_train, y_train, x_test, y_test



def getProtoNNArgs():
    def checkIntPos(value):
        ivalue = int(value)
        if ivalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive int value" % value)
        return ivalue

    def checkIntNneg(value):
        ivalue = int(value)
        if ivalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg int value" % value)
        return ivalue

    def checkFloatNneg(value):
        fvalue = float(value)
        if fvalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg float value" % value)
        return fvalue

    def checkFloatPos(value):
        fvalue = float(value)
        if fvalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive float value" % value)
        return fvalue

    '''
    Parse protoNN commandline arguments
    '''
    parser = argparse.ArgumentParser(
        description='Hyperparameters for ProtoNN Algorithm')

    msg = 'Data directory containing train and test data. The '
    msg += 'data is assumed to be saved as 2-D numpy matrices with '
    msg += 'names `train.npy` and `test.npy`, of dimensions\n'
    msg += '\t[numberOfInstances, numberOfFeatures + 1].\n'
    msg += 'The first column of each file is assumed to contain label information.'
    msg += ' For a N-class problem, labels are assumed to be integers from 0 to'
    msg += ' N-1 (inclusive).'
    parser.add_argument('-d', '--data-dir', required=True, help=msg)
    parser.add_argument('-l', '--projection-dim', type=checkIntPos, default=10,
                        help='Projection Dimension.')
    parser.add_argument('-p', '--num-prototypes', type=checkIntPos, default=20,
                        help='Number of prototypes.')
    parser.add_argument('-g', '--gamma', type=checkFloatPos, default=None,
                        help='Gamma for Gaussian kernel. If not provided, ' +
                        'median heuristic will be used to estimate gamma.')

    parser.add_argument('-e', '--epochs', type=checkIntPos, default=100,
                        help='Total training epochs.')
    parser.add_argument('-b', '--batch-size', type=checkIntPos, default=32,
                        help='Batch size for each pass.')
    parser.add_argument('-r', '--learning-rate', type=checkFloatPos,
                        default=0.001,
                        help='Initial Learning rate for ADAM Optimizer.')

    parser.add_argument('-rW', type=float, default=0.000,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter W ' + '(default = 0.0).')
    parser.add_argument('-rB', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter B ' + '(default = 0.0).')
    parser.add_argument('-rZ', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        'parameter Z ' +
                        '(default = 0.0).')

    parser.add_argument('-sW', type=float, default=1.000,
                        help='Sparsity constraint for predictor parameter W ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sB', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter B ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sZ', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter Z ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-pS', '--print-step', type=int, default=200,
                        help='The number of update steps between print ' +
                        'calls to console.')
    parser.add_argument('-vS', '--val-step', type=int, default=3,
                        help='The number of epochs between validation' +
                        'performance evaluation')
    return parser.parse_args()

In [28]:
#utils
import scipy.cluster
import scipy.spatial
import os


def medianHeuristic(data, projectionDimension, numPrototypes, W_init=None):
    '''
    This method can be used to estimate gamma for ProtoNN. An approximation to
    median heuristic is used here.
    1. First the data is collapsed into the projectionDimension by W_init. If
    W_init is not provided, it is initialized from a random normal(0, 1). Hence
    data normalization is essential.
    2. Prototype are computed by running a  k-means clustering on the projected
    data.
    3. The median distance is then estimated by calculating median distance
    between prototypes and projected data points.

    data needs to be [-1, numFeats]
    If using this method to initialize gamma, please use the W and B as well.

    TODO: Return estimate of Z (prototype labels) based on cluster centroids
    andand labels

    TODO: Clustering fails due to singularity error if projecting upwards

    W [dxd_cap]
    B [d_cap, m]
    returns gamma, W, B
    '''
    assert data.ndim == 2
    X = data
    featDim = data.shape[1]
    if projectionDimension > featDim:
        print("Warning: Projection dimension > feature dimension. Gamma")
        print("\t estimation due to median heuristic could fail.")
        print("\tTo retain the projection dataDimension, provide")
        print("\ta value for gamma.")

    if W_init is None:
        W_init = np.random.normal(size=[featDim, projectionDimension])
    W = W_init
    XW = np.matmul(X, W)
    assert XW.shape[1] == projectionDimension
    assert XW.shape[0] == len(X)
    # Requires [N x d_cap] data matrix of N observations of d_cap-dimension and
    # the number of centroids m. Returns, [n x d_cap] centroids and
    # elementwise center information.
    B, centers = scipy.cluster.vq.kmeans2(XW, numPrototypes)
    # Requires two matrices. Number of observations x dimension of observation
    # space. Distances[i,j] is the distance between XW[i] and B[j]
    distances = scipy.spatial.distance.cdist(XW, B, metric='euclidean')
    distances = np.reshape(distances, [-1])
    gamma = np.median(distances)
    gamma = 1 / (2.5 * gamma)
    return gamma.astype('float32'), W.astype('float32'), B.T.astype('float32')


def multiClassHingeLoss(logits, label, batch_th):
    '''
    MultiClassHingeLoss to match C++ Version - No TF internal version
    '''
    flatLogits = tf.reshape(logits, [-1, ])
    label_ = tf.argmax(label, 1)

    correctId = tf.range(0, batch_th) * label.shape[1] + label_
    correctLogit = tf.gather(flatLogits, correctId)

    maxLabel = tf.argmax(logits, 1)
    top2, _ = tf.nn.top_k(logits, k=2, sorted=True)

    wrongMaxLogit = tf.where(
        tf.equal(maxLabel, label_), top2[:, 1], top2[:, 0])

    return tf.reduce_mean(tf.nn.relu(1. + wrongMaxLogit - correctLogit))


def crossEntropyLoss(logits, label):
    '''
    Cross Entropy loss for MultiClass case in joint training for
    faster convergence
    '''
    return tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                   labels=tf.stop_gradient(label)))


def mean_absolute_error(logits, label):
    '''
    Function to compute the mean absolute error.
    '''
    return tf.reduce_mean(tf.abs(tf.subtract(logits, label)))


def hardThreshold(A, s):
    '''
    Hard thresholding function on Tensor A with sparsity s
    '''
    A_ = np.copy(A)
    A_ = A_.ravel()
    if len(A_) > 0:
        th = np.percentile(np.abs(A_), (1 - s) * 100.0, interpolation='higher')
        A_[np.abs(A_) < th] = 0.0
    A_ = A_.reshape(A.shape)
    return A_


def copySupport(src, dest):
    '''
    copy support of src tensor to dest tensor
    '''
    support = np.nonzero(src)
    dest_ = dest
    dest = np.zeros(dest_.shape)
    dest[support] = dest_[support]
    return dest


def countnnZ(A, s, bytesPerVar=4):
    '''
    Returns # of non-zeros and representative size of the tensor
    Uses dense for s >= 0.5 - 4 byte
    Else uses sparse - 8 byte
    '''
    params = 1
    hasSparse = False
    for i in range(0, len(A.shape)):
        params *= int(A.shape[i])
    if s < 0.5:
        nnZ = np.ceil(params * s)
        hasSparse = True
        return nnZ, nnZ * 2 * bytesPerVar, hasSparse
    else:
        nnZ = params
        return nnZ, nnZ * bytesPerVar, hasSparse


def getConfusionMatrix(predicted, target, numClasses):
    '''
    Returns a confusion matrix for a multiclass classification
    problem. `predicted` is a 1-D array of integers representing
    the predicted classes and `target` is the target classes.

    confusion[i][j]: Number of elements of class j
        predicted as class i
    Labels are assumed to be in range(0, numClasses)
    Use`printFormattedConfusionMatrix` to echo the confusion matrix
    in a user friendly form.
    '''
    assert(predicted.ndim == 1)
    assert(target.ndim == 1)
    arr = np.zeros([numClasses, numClasses])

    for i in range(len(predicted)):
        arr[predicted[i]][target[i]] += 1
    return arr


def printFormattedConfusionMatrix(matrix):
    '''
    Given a 2D confusion matrix, prints it in a human readable way.
    The confusion matrix is expected to be a 2D numpy array with
    square dimensions
    '''
    assert(matrix.ndim == 2)
    assert(matrix.shape[0] == matrix.shape[1])
    RECALL = 'Recall'
    PRECISION = 'PRECISION'
    print("|%s|" % ('True->'), end='')
    for i in range(matrix.shape[0]):
        print("%7d|" % i, end='')
    print("%s|" % 'Precision')

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))

    precisionlist = np.sum(matrix, axis=1)
    recalllist = np.sum(matrix, axis=0)
    precisionlist = [matrix[i][i] / x if x !=
                     0 else -1 for i, x in enumerate(precisionlist)]
    recalllist = [matrix[i][i] / x if x !=
                  0 else -1 for i, x in enumerate(recalllist)]
    for i in range(matrix.shape[0]):
        # len recall = 6
        print("|%6d|" % (i), end='')
        for j in range(matrix.shape[0]):
            print("%7d|" % (matrix[i][j]), end='')
        print("%s" % (" " * (len(PRECISION) - 7)), end='')
        if precisionlist[i] != -1:
            print("%1.5f|" % precisionlist[i])
        else:
            print("%7s|" % "nan")

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))
    print("|%s|" % ('Recall'), end='')

    for i in range(matrix.shape[0]):
        if recalllist[i] != -1:
            print("%1.5f|" % (recalllist[i]), end='')
        else:
            print("%7s|" % "nan", end='')

    print('%s|' % (' ' * len(PRECISION)))


def getPrecisionRecall(cmatrix, label=1):
    trueP = cmatrix[label][label]
    denom = np.sum(cmatrix, axis=0)[label]
    if denom == 0:
        denom = 1
    recall = trueP / denom
    denom = np.sum(cmatrix, axis=1)[label]
    if denom == 0:
        denom = 1
    precision = trueP / denom
    return precision, recall


def getMacroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    precision = np.sum(precisionlist__)
    precision /= len(precisionlist__)
    recall = np.sum(recalllist__)
    recall /= len(recalllist__)
    return precision, recall


def getMicroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    num = 0.0
    for i in range(len(cmatrix)):
        num += cmatrix[i][i]

    precision = num / np.sum(precisionlist)
    recall = num / np.sum(recalllist)
    return precision, recall


def getMacroMicroFScore(cmatrix):
    '''
    Returns macro and micro f-scores.
    Refer: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.8244&rep=rep1&type=pdf
    '''
    precisionlist = np.sum(cmatrix, axis=1)
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    macro = 0.0
    for i in range(len(precisionlist)):
        denom = precisionlist__[i] + recalllist__[i]
        numer = precisionlist__[i] * recalllist__[i] * 2
        if denom == 0:
            denom = 1
        macro += numer / denom
    macro /= len(precisionlist)

    num = 0.0
    for i in range(len(precisionlist)):
        num += cmatrix[i][i]

    denom1 = np.sum(precisionlist)
    denom2 = np.sum(recalllist)
    pi = num / denom1
    rho = num / denom2
    denom = pi + rho
    if denom == 0:
        denom = 1
    micro = 2 * pi * rho / denom
    return macro, micro


class GraphManager:
    '''
    Manages saving and restoring graphs. Designed to be used with EMI-RNN
    though is general enough to be useful otherwise as well.
    '''

    def __init__(self):
        pass

    def checkpointModel(self, saver, sess, modelPrefix,
                        globalStep=1000, redirFile=None):
        saver.save(sess, modelPrefix, global_step=globalStep)
        print('Model saved to %s, global_step %d' % (modelPrefix, globalStep),
              file=redirFile)

    def loadCheckpoint(self, sess, modelPrefix, globalStep,
                       redirFile=None):
        metaname = modelPrefix + '-%d.meta' % globalStep
        basename = os.path.basename(metaname)
        fileList = os.listdir(os.path.dirname(modelPrefix))
        fileList = [x for x in fileList if x.startswith(basename)]
        assert len(fileList) > 0, 'Checkpoint file not found'
        msg = 'Too many or too few checkpoint files for globalStep: %d' % globalStep
        assert len(fileList) is 1, msg
        chkpt = basename + '/' + fileList[0]
        saver = tf.train.import_meta_graph(metaname)
        metaname = metaname[:-5]
        saver.restore(sess, metaname)
        graph = tf.get_default_graph()
        return graph

In [29]:
#Trainer
class ProtoNNTrainer:
    def __init__(self, protoNNObj, regW, regB, regZ,
                 sparcityW, sparcityB, sparcityZ,
                 learningRate, X, Y, lossType='l2'):
        '''
        A wrapper for the various techniques used for training ProtoNN. This
        subsumes both the responsibility of loss graph construction and
        performing training. The original training routine that is part of the
        C++ implementation of EdgeML used iterative hard thresholding (IHT),
        gamma estimation through median heuristic and other tricks for
        training ProtoNN. This module implements the same in Tensorflow
        and python.

        protoNNObj: An instance of ProtoNN class defining the forward
            computation graph. The loss functions and training routines will be
            attached to this instance.
        regW, regB, regZ: Regularization constants for W, B, and
            Z matrices of protoNN.
        sparcityW, sparcityB, sparcityZ: Sparsity constraints
            for W, B and Z matrices. A value between 0 (exclusive) and 1
            (inclusive) is expected. A value of 1 indicates dense training.
        learningRate: Initial learning rate for ADAM optimizer.
        X, Y : Placeholders for data and labels.
            X [-1, featureDimension]
            Y [-1, num Labels]
        lossType: ['l2', 'xentropy']
        '''
        self.protoNNObj = protoNNObj
        self.__regW = regW
        self.__regB = regB
        self.__regZ = regZ
        self.__sW = sparcityW
        self.__sB = sparcityB
        self.__sZ = sparcityZ
        self.__lR = learningRate
        self.X = X
        self.Y = Y
        self.sparseTraining = True
        if (sparcityW == 1.0) and (sparcityB == 1.0) and (sparcityZ == 1.0):
            self.sparseTraining = False
            print("Sparse training disabled.", file=sys.stderr)
        # Define placeholders for sparse training
        self.W_th = None
        self.B_th = None
        self.Z_th = None
        self.__lossType = lossType
        self.__validInit = False
        self.__validInit = self.__validateInit()
        self.__protoNNOut = protoNNObj(X, Y)
        self.loss = self.__lossGraph()
        self.trainStep = self.__trainGraph()
        self.__hthOp = self.__getHardThresholdOp()
        self.accuracy = protoNNObj.getAccuracyOp()

    def __validateInit(self):
        self.__validInit = False
        msg = "Sparsity value should be between"
        msg += " 0 and 1 (both inclusive)."
        assert self.__sW >= 0. and self.__sW <= 1., 'W:' + msg
        assert self.__sB >= 0. and self.__sB <= 1., 'B:' + msg
        assert self.__sZ >= 0. and self.__sZ <= 1., 'Z:' + msg
        d, dcap, m, L, _ = self.protoNNObj.getHyperParams()
        msg = 'Y should be of dimension [-1, num labels/classes]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.Y.shape)) == 2, msg
        assert (self.Y.shape[1] == L), msg
        msg = 'X should be of dimension [-1, featureDimension]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.X.shape) == 2), msg
        assert (self.X.shape[1] == d), msg
        self.__validInit = True
        msg = 'Values can be \'l2\', or \'xentropy\''
        if self.__lossType not in ['l2', 'xentropy']:
            raise ValueError(msg)
        return True

    def __lossGraph(self):
        pnnOut = self.__protoNNOut
        l1, l2, l3 = self.__regW, self.__regB, self.__regZ
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        if self.__lossType == 'l2':
            with tf.name_scope('protonn-l2-loss'):
                loss_0 = tf.nn.l2_loss(self.Y - pnnOut)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        elif self.__lossType == 'xentropy':
            with tf.name_scope('protonn-xentropy-loss'):
                loss_0 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pnnOut,
                                                         labels=tf.stop_gradient(self.Y))
                loss_0 = tf.reduce_mean(loss_0)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        return loss

    def __trainGraph(self):
        with tf.name_scope('protonn-gradient-adam'):
            trainStep = tf.train.AdamOptimizer(self.__lR)
            trainStep = trainStep.minimize(self.loss)
        return trainStep

    def __getHardThresholdOp(self):
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        self.W_th = tf.placeholder(tf.float32, name='W_th')
        self.B_th = tf.placeholder(tf.float32, name='B_th')
        self.Z_th = tf.placeholder(tf.float32, name='Z_th')
        with tf.name_scope('hard-threshold-assignments'):
            hard_thrsd_W = W.assign(self.W_th)
            hard_thrsd_B = B.assign(self.B_th)
            hard_thrsd_Z = Z.assign(self.Z_th)
            hard_thrsd_op = tf.group(hard_thrsd_W, hard_thrsd_B, hard_thrsd_Z)
        return hard_thrsd_op

    def train(self, batchSize, totalEpochs, sess,
              x_train, x_val, y_train, y_val, noInit=False,
              redirFile=None, printStep=10, valStep=3):
        '''
        Performs dense training of ProtoNN followed by iterative hard
        thresholding to enforce sparsity constraints.

        batchSize: Batch size per update
        totalEpochs: The number of epochs to run training for. One epoch is
            defined as one pass over the entire training data.
        sess: The Tensorflow session to use for running various graph
            operators.
        x_train, x_val, y_train, y_val: The numpy array containing train and
            validation data. x data is assumed to in of shape [-1,
            featureDimension] while y should have shape [-1, numberLabels].
        noInit: By default, all the tensors of the computation graph are
        initialized at the start of the training session. Set noInit=False to
        disable this behaviour.
        printStep: Number of batches between echoing of loss and train accuracy.
        valStep: Number of epochs between evolutions on validation set.
        '''
        d, d_cap, m, L, gamma = self.protoNNObj.getHyperParams()
        assert batchSize >= 1, 'Batch size should be positive integer'
        assert totalEpochs >= 1, 'Total epochs should be positive integer'
        assert x_train.ndim == 2, 'Expected training data to be of rank 2'
        assert x_train.shape[1] == d, 'Expected x_train to be [-1, %d]' % d
        assert x_val.ndim == 2, 'Expected validation data to be of rank 2'
        assert x_val.shape[1] == d, 'Expected x_val to be [-1, %d]' % d
        assert y_train.ndim == 2, 'Expected training labels to be of rank 2'
        assert y_train.shape[1] == L, 'Expected y_train to be [-1, %d]' % L
        assert y_val.ndim == 2, 'Expected validation labels to be of rank 2'
        assert y_val.shape[1] == L, 'Expected y_val to be [-1, %d]' % L

        # Numpy will throw asserts for arrays
        if sess is None:
            raise ValueError('sess must be valid Tensorflow session.')

        trainNumBatches = int(np.ceil(len(x_train) / batchSize))
        valNumBatches = int(np.ceil(len(x_val) / batchSize))
        x_train_batches = np.array_split(x_train, trainNumBatches)
        y_train_batches = np.array_split(y_train, trainNumBatches)
        x_val_batches = np.array_split(x_val, valNumBatches)
        y_val_batches = np.array_split(y_val, valNumBatches)
        if not noInit:
            sess.run(tf.global_variables_initializer())
        X, Y = self.X, self.Y
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        for epoch in range(totalEpochs):
            for i in range(len(x_train_batches)):
                batch_x = x_train_batches[i]
                batch_y = y_train_batches[i]
                feed_dict = {
                    X: batch_x,
                    Y: batch_y
                }
                sess.run(self.trainStep, feed_dict=feed_dict)
                if i % printStep == 0:
                    loss, acc = sess.run([self.loss, self.accuracy],
                                         feed_dict=feed_dict)
                    msg = "Epoch: %3d Batch: %3d" % (epoch, i)
                    msg += " Loss: %3.5f Accuracy: %2.5f" % (loss, acc)
                    print(msg, file=redirFile)

            # Perform Hard thresholding
            if self.sparseTraining:
                W_, B_, Z_ = sess.run([W, B, Z])
                fd_thrsd = {
                    self.W_th: hardThreshold(W_, self.__sW),
                    self.B_th: hardThreshold(B_, self.__sB),
                    self.Z_th: hardThreshold(Z_, self.__sZ)
                }
                sess.run(self.__hthOp, feed_dict=fd_thrsd)

            if (epoch + 1) % valStep  == 0:
                acc = 0.0
                loss = 0.0
                for j in range(len(x_val_batches)):
                    batch_x = x_val_batches[j]
                    batch_y = y_val_batches[j]
                    feed_dict = {
                        X: batch_x,
                        Y: batch_y
                    }
                    acc_, loss_ = sess.run([self.accuracy, self.loss],
                                           feed_dict=feed_dict)
                    acc += acc_
                    loss += loss_
                acc /= len(y_val_batches)
                loss /= len(y_val_batches)
                print("Test Loss: %2.5f Accuracy: %2.5f" % (loss, acc))


In [30]:

class ProtoNN:
    def __init__(self, inputDimension, projectionDimension, numPrototypes,
                 numOutputLabels, gamma,
                 W = None, B = None, Z = None):
        '''
        Forward computation graph for ProtoNN.

        inputDimension: Input data dimension or feature dimension.
        projectionDimension: hyperparameter
        numPrototypes: hyperparameter
        numOutputLabels: The number of output labels or classes
        W, B, Z: Numpy matrices that can be used to initialize
            projection matrix(W), prototype matrix (B) and prototype labels
            matrix (B).
            Expected Dimensions:
                W   inputDimension (d) x projectionDimension (d_cap)
                B   projectionDimension (d_cap) x numPrototypes (m)
                Z   numOutputLabels (L) x numPrototypes (m)
        '''
        with tf.name_scope('protoNN') as ns:
            self.__nscope = ns
        self.__d = inputDimension
        self.__d_cap = projectionDimension
        self.__m = numPrototypes
        self.__L = numOutputLabels

        self.__inW = W
        self.__inB = B
        self.__inZ = Z
        self.__inGamma = gamma
        self.W, self.B, self.Z = None, None, None
        self.gamma = None

        self.__validInit = False
        self.__initWBZ()
        self.__initGamma()
        self.__validateInit()
        self.protoNNOut = None
        self.predictions = None
        self.accuracy = None

    def __validateInit(self):
        self.__validInit = False
        errmsg = "Dimensions mismatch! Should be W[d, d_cap]"
        errmsg += ", B[d_cap, m] and Z[L, m]"
        d, d_cap, m, L, _ = self.getHyperParams()
        assert self.W.shape[0] == d, errmsg
        assert self.W.shape[1] == d_cap, errmsg
        assert self.B.shape[0] == d_cap, errmsg
        assert self.B.shape[1] == m, errmsg
        assert self.Z.shape[0] == L, errmsg
        assert self.Z.shape[1] == m, errmsg
        self.__validInit = True

    def __initWBZ(self):
        with tf.name_scope(self.__nscope):
            W = self.__inW
            if W is None:
                W = tf.random_normal_initializer()
                W = W([self.__d, self.__d_cap])
            self.W = tf.Variable(W, name='W', dtype=tf.float32)

            B = self.__inB
            if B is None:
                B = tf.random_uniform_initializer()
                B = B([self.__d_cap, self.__m])
            self.B = tf.Variable(B, name='B', dtype=tf.float32)

            Z = self.__inZ
            if Z is None:
                Z = tf.random_normal_initializer()
                Z = Z([self.__L, self.__m])
            Z = tf.Variable(Z, name='Z', dtype=tf.float32)
            self.Z = Z
        return self.W, self.B, self.Z

    def __initGamma(self):
        with tf.name_scope(self.__nscope):
            gamma = self.__inGamma
            self.gamma = tf.constant(gamma, name='gamma')

    def getHyperParams(self):
        '''
        Returns the model hyperparameters:
            [inputDimension, projectionDimension,
            numPrototypes, numOutputLabels, gamma]
        '''
        d = self.__d
        dcap = self.__d_cap
        m = self.__m
        L = self.__L
        return d, dcap, m, L, self.gamma

    def getModelMatrices(self):
        '''
        Returns Tensorflow tensors of the model matrices, which
        can then be evaluated to obtain corresponding numpy arrays.

        These can then be exported as part of other implementations of
        ProtonNN, for instance a C++ implementation or pure python
        implementation.
        Returns
            [ProjectionMatrix (W), prototypeMatrix (B),
             prototypeLabelsMatrix (Z), gamma]
        '''
        return self.W, self.B, self.Z, self.gamma

    def __call__(self, X, Y=None):
        '''
        This method is responsible for construction of the forward computation
        graph. The end point of the computation graph, or in other words the
        output operator for the forward computation is returned. Additionally,
        if the argument Y is provided, a classification accuracy operator with
        Y as target will also be created. For this, Y is assumed to in one-hot
        encoded format and the class with the maximum prediction score is
        compared to the encoded class in Y.  This accuracy operator is returned
        by getAccuracyOp() method. If a different accuracyOp is required, it
        can be defined by overriding the createAccOp(protoNNScoresOut, Y)
        method.

        X: Input tensor or placeholder of shape [-1, inputDimension]
        Y: Optional tensor or placeholder for targets (labels or classes).
            Expected shape is [-1, numOutputLabels].
        returns: The forward computation outputs, self.protoNNOut
        '''
        # This should never execute
        assert self.__validInit is True, "Initialization failed!"
        if self.protoNNOut is not None:
            return self.protoNNOut

        W, B, Z, gamma = self.W, self.B, self.Z, self.gamma
        with tf.name_scope(self.__nscope):
            WX = tf.matmul(X, W)
            # Convert WX to tensor so that broadcasting can work
            dim = [-1, WX.shape.as_list()[1], 1]
            WX = tf.reshape(WX, dim)
            dim = [1, B.shape.as_list()[0], -1]
            B = tf.reshape(B, dim)
            l2sim = B - WX
            l2sim = tf.pow(l2sim, 2)
            l2sim = tf.reduce_sum(l2sim, 1, keepdims=True)
            self.l2sim = l2sim
            gammal2sim = (-1 * gamma * gamma) * l2sim
            M = tf.exp(gammal2sim)
            dim = [1] + Z.shape.as_list()
            Z = tf.reshape(Z, dim)
            y = tf.multiply(Z, M)
            y = tf.reduce_sum(y, 2, name='protoNNScoreOut')
            self.protoNNOut = y
            self.predictions = tf.argmax(y, 1, name='protoNNPredictions')
            if Y is not None:
                self.createAccOp(self.protoNNOut, Y)
        return y

    def createAccOp(self, outputs, target):
        '''
        Define an accuracy operation on ProtoNN's output scores and targets.
        Here a simple classification accuracy operator is defined. More
        complicated operators (for multiple label problems and so forth) can be
        defined by overriding this method
        '''
        assert self.predictions is not None
        target = tf.argmax(target, 1)
        correctPrediction = tf.equal(self.predictions, target)
        acc = tf.reduce_mean(tf.cast(correctPrediction, tf.float32),
                             name='protoNNAccuracy')
        self.accuracy = acc

    def getPredictionsOp(self):
        '''
        The predictions operator is defined as argmax(protoNNScores) for each
        prediction.
        '''
        return self.predictions

    def getAccuracyOp(self):
        '''
        returns accuracyOp as defined by createAccOp. It defaults to
        multi-class classification accuracy.
        '''
        msg = "Accuracy operator not defined in graph. Did you provide Y as an"
        msg += " argument to _call_?"
        assert self.accuracy is not None, msg
        return self.accuracy

In [31]:
DATA_DIR = r"./experiments"
windowLen = 'data_w2'
out = preprocessData(DATA_DIR,windowLen)
dataDimension = out[0]
numClasses = out[1]
x_train, y_train = out[2], out[3]
x_test, y_test = out[4], out[5]
print("Feature Dimension: ", dataDimension)
print("Num classes: ", numClasses)

Feature Dimension:  423
Num classes:  2


In [32]:

DATA_DIR = r"./experiments"
train, test = np.load(DATA_DIR + '/ttrain_data_w2.npy'), np.load(DATA_DIR + '/ttest_data_w2.npy')
x_train, y_train = train[:, 1:], train[:, 0]
x_test, y_test = test[:, 1:], test[:, 0]

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

numClasses = max(y_train) - min(y_train) + 1
numClasses = max(numClasses, max(y_test) - min(y_test) + 1)
numClasses = int(numClasses)

y_train = helper.to_onehot(y_train, numClasses)
y_test = helper.to_onehot(y_test, numClasses)
y_val = helper.to_onehot(y_val, numClasses)

dataDimension = x_train.shape[1]
numClasses = y_train.shape[1]

In [33]:

X = tf.placeholder(tf.float32, [None, dataDimension], name='X')
Y = tf.placeholder(tf.float32, [None, numClasses], name='Y')
protoNN = ProtoNN(dataDimension, PROJECTION_DIM,
                  NUM_PROTOTYPES, numClasses,
                  gamma, W=W, B=B)
trainer = ProtoNNTrainer(protoNN,  REG_W, REG_B, REG_Z,
                         SPAR_W, SPAR_B, SPAR_Z,
                        LEARNING_RATE, X, Y, lossType='l2')
sess = tf.Session()

trainer.train(2048, 800, sess, x_train, x_test, y_train, y_test,
              printStep=600, valStep=10)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Epoch:   0 Batch:   0 Loss: 46868.41406 Accuracy: 0.47614
Epoch:   1 Batch:   0 Loss: 44770.91016 Accuracy: 0.47614
Epoch:   2 Batch:   0 Loss: 42837.33203 Accuracy: 0.47614
Epoch:   3 Batch:   0 Loss: 40965.57031 Accuracy: 0.47614
Epoch:   4 Batch:   0 Loss: 39157.84766 Accuracy: 0.47614
Epoch:   5 Batch:   0 Loss: 37414.41797 Accuracy: 0.47614
Epoch:   6 Batch:   0 Loss: 35734.51172 Accuracy: 0.47614
Epoch:   7 Batch:   0 Loss: 34116.85156 Accuracy: 0.47614
Epoch:   8 Batch:   0 Loss: 32560.05664 Accuracy: 0.47614
Epoch:   9 Batch:   0 Loss: 31062.67969 Accuracy: 0.47614
Test Loss: 28237.13965 Accuracy: 0.49987
Epoch:  10 Batch:   0 Loss: 29623.28125 Accuracy: 0.47614
Epoch:  11 Batch:   0 Loss: 28240.40820 Accuracy: 0.47614
Epoch:  12 Batch:   0 Loss: 26912.63281 Accuracy: 0.47614
Epoch:  13 Batch:   0 Loss: 25638.51758 Accuracy: 0.47

Test Loss: 248.12094 Accuracy: 0.49554
Epoch: 130 Batch:   0 Loss: 244.44221 Accuracy: 0.55083
Epoch: 131 Batch:   0 Loss: 243.46843 Accuracy: 0.54979
Epoch: 132 Batch:   0 Loss: 242.52629 Accuracy: 0.55498
Epoch: 133 Batch:   0 Loss: 241.68901 Accuracy: 0.55602
Epoch: 134 Batch:   0 Loss: 240.91435 Accuracy: 0.56846
Epoch: 135 Batch:   0 Loss: 240.19415 Accuracy: 0.57158
Epoch: 136 Batch:   0 Loss: 239.52411 Accuracy: 0.57365
Epoch: 137 Batch:   0 Loss: 238.90012 Accuracy: 0.58402
Epoch: 138 Batch:   0 Loss: 238.31854 Accuracy: 0.58506
Epoch: 139 Batch:   0 Loss: 237.76915 Accuracy: 0.58921
Test Loss: 242.28015 Accuracy: 0.51941
Epoch: 140 Batch:   0 Loss: 237.25764 Accuracy: 0.58506
Epoch: 141 Batch:   0 Loss: 236.77925 Accuracy: 0.58921
Epoch: 142 Batch:   0 Loss: 236.33110 Accuracy: 0.59959
Epoch: 143 Batch:   0 Loss: 235.91087 Accuracy: 0.60270
Epoch: 144 Batch:   0 Loss: 235.51419 Accuracy: 0.60996
Epoch: 145 Batch:   0 Loss: 235.14304 Accuracy: 0.61618
Epoch: 146 Batch:   0 Loss

Epoch: 268 Batch:   0 Loss: 218.40016 Accuracy: 0.68361
Epoch: 269 Batch:   0 Loss: 218.27057 Accuracy: 0.68465
Test Loss: 229.44937 Accuracy: 0.61911
Epoch: 270 Batch:   0 Loss: 218.14020 Accuracy: 0.68361
Epoch: 271 Batch:   0 Loss: 218.00954 Accuracy: 0.68361
Epoch: 272 Batch:   0 Loss: 217.87834 Accuracy: 0.68568
Epoch: 273 Batch:   0 Loss: 217.74568 Accuracy: 0.68672
Epoch: 274 Batch:   0 Loss: 217.61333 Accuracy: 0.68672
Epoch: 275 Batch:   0 Loss: 217.48044 Accuracy: 0.68672
Epoch: 276 Batch:   0 Loss: 217.34714 Accuracy: 0.68672
Epoch: 277 Batch:   0 Loss: 217.21292 Accuracy: 0.68672
Epoch: 278 Batch:   0 Loss: 217.07838 Accuracy: 0.68672
Epoch: 279 Batch:   0 Loss: 216.94331 Accuracy: 0.68672
Test Loss: 228.45618 Accuracy: 0.62003
Epoch: 280 Batch:   0 Loss: 216.80774 Accuracy: 0.68672
Epoch: 281 Batch:   0 Loss: 216.67168 Accuracy: 0.68672
Epoch: 282 Batch:   0 Loss: 216.53508 Accuracy: 0.68672
Epoch: 283 Batch:   0 Loss: 216.39487 Accuracy: 0.68672
Epoch: 284 Batch:   0 Loss

Epoch: 406 Batch:   0 Loss: 192.30379 Accuracy: 0.74896
Epoch: 407 Batch:   0 Loss: 192.02461 Accuracy: 0.75104
Epoch: 408 Batch:   0 Loss: 191.74397 Accuracy: 0.75104
Epoch: 409 Batch:   0 Loss: 191.46185 Accuracy: 0.75104
Test Loss: 207.21646 Accuracy: 0.69756
Epoch: 410 Batch:   0 Loss: 191.17833 Accuracy: 0.75207
Epoch: 411 Batch:   0 Loss: 190.89317 Accuracy: 0.75207
Epoch: 412 Batch:   0 Loss: 190.60661 Accuracy: 0.75519
Epoch: 413 Batch:   0 Loss: 190.31862 Accuracy: 0.75415
Epoch: 414 Batch:   0 Loss: 190.02911 Accuracy: 0.75622
Epoch: 415 Batch:   0 Loss: 189.73817 Accuracy: 0.75622
Epoch: 416 Batch:   0 Loss: 189.44516 Accuracy: 0.75726
Epoch: 417 Batch:   0 Loss: 189.15073 Accuracy: 0.75830
Epoch: 418 Batch:   0 Loss: 188.85408 Accuracy: 0.76141
Epoch: 419 Batch:   0 Loss: 188.55598 Accuracy: 0.76349
Test Loss: 204.79952 Accuracy: 0.70721
Epoch: 420 Batch:   0 Loss: 188.25641 Accuracy: 0.76452
Epoch: 421 Batch:   0 Loss: 187.95532 Accuracy: 0.76556
Epoch: 422 Batch:   0 Loss

Epoch: 543 Batch:   0 Loss: 144.11165 Accuracy: 0.86618
Epoch: 544 Batch:   0 Loss: 143.74059 Accuracy: 0.86618
Epoch: 545 Batch:   0 Loss: 143.37239 Accuracy: 0.86618
Epoch: 546 Batch:   0 Loss: 143.00478 Accuracy: 0.86618
Epoch: 547 Batch:   0 Loss: 142.63782 Accuracy: 0.86722
Epoch: 548 Batch:   0 Loss: 142.27159 Accuracy: 0.86722
Epoch: 549 Batch:   0 Loss: 141.90604 Accuracy: 0.86722
Test Loss: 171.47847 Accuracy: 0.78646
Epoch: 550 Batch:   0 Loss: 141.54085 Accuracy: 0.86722
Epoch: 551 Batch:   0 Loss: 141.17703 Accuracy: 0.86722
Epoch: 552 Batch:   0 Loss: 140.81406 Accuracy: 0.86722
Epoch: 553 Batch:   0 Loss: 140.45151 Accuracy: 0.86722
Epoch: 554 Batch:   0 Loss: 140.08975 Accuracy: 0.86722
Epoch: 555 Batch:   0 Loss: 139.72899 Accuracy: 0.86722
Epoch: 556 Batch:   0 Loss: 139.36913 Accuracy: 0.86722
Epoch: 557 Batch:   0 Loss: 139.00999 Accuracy: 0.86722
Epoch: 558 Batch:   0 Loss: 138.65157 Accuracy: 0.86618
Epoch: 559 Batch:   0 Loss: 138.29478 Accuracy: 0.86618
Test Loss

Epoch: 682 Batch:   0 Loss: 103.38098 Accuracy: 0.88900
Epoch: 683 Batch:   0 Loss: 103.17718 Accuracy: 0.88900
Epoch: 684 Batch:   0 Loss: 102.97262 Accuracy: 0.88797
Epoch: 685 Batch:   0 Loss: 102.76967 Accuracy: 0.88797
Epoch: 686 Batch:   0 Loss: 102.56738 Accuracy: 0.88797
Epoch: 687 Batch:   0 Loss: 102.36708 Accuracy: 0.88797
Epoch: 688 Batch:   0 Loss: 102.16743 Accuracy: 0.88797
Epoch: 689 Batch:   0 Loss: 101.96894 Accuracy: 0.88797
Test Loss: 159.32807 Accuracy: 0.78063
Epoch: 690 Batch:   0 Loss: 101.77213 Accuracy: 0.88797
Epoch: 691 Batch:   0 Loss: 101.57639 Accuracy: 0.88797
Epoch: 692 Batch:   0 Loss: 101.38161 Accuracy: 0.88900
Epoch: 693 Batch:   0 Loss: 101.18822 Accuracy: 0.88900
Epoch: 694 Batch:   0 Loss: 100.99547 Accuracy: 0.88900
Epoch: 695 Batch:   0 Loss: 100.80372 Accuracy: 0.88900
Epoch: 696 Batch:   0 Loss: 100.61311 Accuracy: 0.88900
Epoch: 697 Batch:   0 Loss: 100.42360 Accuracy: 0.88900
Epoch: 698 Batch:   0 Loss: 100.23495 Accuracy: 0.88900
Epoch: 69

In [34]:
acc = sess.run(protoNN.accuracy, feed_dict={X: x_test, Y: y_test})
pred = sess.run(protoNN.predictions, feed_dict={X: x_test, Y: y_test})
# W, B, Z are tensorflow graph nodes
W, B, Z, _ = protoNN.getModelMatrices()
matrixList = sess.run([W, B, Z])
sparcityList = [SPAR_W, SPAR_B, SPAR_Z]                       
nnz, size, sparse = getModelSize(matrixList, sparcityList)
print("Final test accuracy", acc)
print("Model size constraint (Bytes): ", size)
print("Number of non-zeros: ", nnz)

Final test accuracy 0.77599895
Model size constraint (Bytes):  9580
Number of non-zeros:  2395


In [35]:
from sklearn.metrics import confusion_matrix,classification_report
y_test = np.argmax(y_test,axis=1)
print (confusion_matrix(y_test,pred))
print (classification_report(y_test,pred,digits=5))

[[2876  903]
 [ 790 2989]]
              precision    recall  f1-score   support

           0    0.78451   0.76105   0.77260      3779
           1    0.76799   0.79095   0.77930      3779

    accuracy                        0.77600      7558
   macro avg    0.77625   0.77600   0.77595      7558
weighted avg    0.77625   0.77600   0.77595      7558



In [36]:
sensitivity = confusion_matrix(y_test,pred)[1][1]/(confusion_matrix(y_test,pred)[1][1] + confusion_matrix(y_test,pred)[1][0])
sensitivity

0.7909499867689865

In [37]:
specificity = confusion_matrix(y_test,pred)[0][0]/(confusion_matrix(y_test,pred)[0][0] + confusion_matrix(y_test,pred)[0][1])
specificity

0.7610478962688542

**WINDOW 3**

In [38]:
from IPython.display import display_html
def restartkernel() :
    display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)

In [None]:
restartkernel()

In [1]:
hp = {'REG_W': 2.150499128114348e-06,
 'REG_B': 0.0013123991795708923,
 'REG_Z': 2.7678514122803996e-05,
 'SPAR_W': 0.9022371274865826,
 'SPAR_B': 0.5539345534584593,
 'SPAR_Z': 0.9993522107302426,
 'loss': 'l2',
 'LEARNING_RATE': 0.00031975053586635394,
 'NUM_EPOCHS': 300,
 'alpha': 0.31386241034032014}
PROJECTION_DIM = 5 #d^
NUM_PROTOTYPES = 40 #m
REG_W = hp['REG_W']
REG_B = hp['REG_B']
REG_Z = hp['REG_Z']
SPAR_W = hp['SPAR_W']
SPAR_B = hp['SPAR_B']
SPAR_Z = hp['SPAR_Z']
loss = hp['loss']
LEARNING_RATE = study.best_params['LEARNING_RATE']
NUM_EPOCHS = study.best_params['NUM_EPOCHS']
BATCH_SIZE = 1024
GAMMA = gamma



NameError: name 'study' is not defined

In [2]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

from __future__ import print_function
import sys
import os
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

#sys.path.insert(0, '../../')
# from edgeml.trainer.protoNNTrainer import ProtoNNTrainer
# from edgeml.graph.protoNN import ProtoNN
# import edgeml.utils as utils
# import helpermethods as helper
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV


sys.path.append(r"E:\programming\practice\research\optimized code\EdgeML\examples\tf\ProtoNN")
import helpermethods as helper

#helper methods
sys.path.insert(0, '../')
import argparse


def getModelSize(matrixList, sparcityList, expected=True, bytesPerVar=4):
    '''
    expected: Expected size according to the parameters set. The number of
        zeros could actually be more than that is required to satisfy the
        sparsity constraint.
    '''
    nnzList, sizeList, isSparseList = [], [], []
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        assert A.ndim == 2
        assert s >= 0
        assert s <= 1
        nnz, size, sparse = countnnZ(A, s, bytesPerVar=bytesPerVar)
        nnzList.append(nnz)
        sizeList.append(size)
        hasSparse = (hasSparse or sparse)

    totalnnZ = np.sum(nnzList)
    totalSize = np.sum(sizeList)
    if expected:
        return totalnnZ, totalSize, hasSparse
    numNonZero = 0
    totalSize = 0
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        numNonZero_ = np.count_nonzero(A)
        numNonZero += numNonZero_
        hasSparse = (hasSparse or (s < 0.5))
        if s <= 0.5:
            totalSize += numNonZero_ * 2 * bytesPerVar
        else:
            totalSize += A.size * bytesPerVar
    return numNonZero, totalSize, hasSparse


def getGamma(gammaInit, projectionDim, dataDim, numPrototypes, x_train):
    if gammaInit is None:
        print("Using median heuristic to estimate gamma.")
        gamma, W, B = medianHeuristic(x_train, projectionDim,
                                            numPrototypes)
        print("Gamma estimate is: %f" % gamma)
        return W, B, gamma
    return None, None, gammaInit


def preprocessData(dataDir,w):
    '''
    Loads data from the dataDir and does some initial preprocessing
    steps. Data is assumed to be contained in two files,
    train.npy and test.npy. Each containing a 2D numpy array of dimension
    [numberOfExamples, numberOfFeatures + 1]. The first column of each
    matrix is assumed to contain label information.

    For an N-Class problem, we assume the labels are integers from 0 through
    N-1.
    '''
    # Uncomment for usual training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')
    # Uncomment for time domain training data
    train = np.load(dataDir + '/ttrain_'+str(w)+'.npy')
    test = np.load(dataDir + '/ttest_'+str(w)+'.npy')
    # Uncomment for 1 sensordrop training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')

    dataDimension = int(train.shape[1]) - 1
    x_train = train[:, 1:dataDimension + 1]
    y_train_ = train[:, 0]
    x_test = test[:, 1:dataDimension + 1]
    y_test_ = test[:, 0]

    numClasses = max(y_train_) - min(y_train_) + 1
    numClasses = max(numClasses, max(y_test_) - min(y_test_) + 1)
    numClasses = int(numClasses)

    # mean-var
    mean = np.mean(x_train, 0)
    std = np.std(x_train, 0)
    std[std[:] < 0.000001] = 1
    x_train = (x_train - mean) / std
    x_test = (x_test - mean) / std

    # one hot y-train
    lab = y_train_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_train.shape[0], numClasses))
    lab_[np.arange(x_train.shape[0]), lab] = 1
    y_train = lab_

    # one hot y-test
    lab = y_test_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_test.shape[0], numClasses))
    lab_[np.arange(x_test.shape[0]), lab] = 1
    y_test = lab_

    return dataDimension, numClasses, x_train, y_train, x_test, y_test



def getProtoNNArgs():
    def checkIntPos(value):
        ivalue = int(value)
        if ivalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive int value" % value)
        return ivalue

    def checkIntNneg(value):
        ivalue = int(value)
        if ivalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg int value" % value)
        return ivalue

    def checkFloatNneg(value):
        fvalue = float(value)
        if fvalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg float value" % value)
        return fvalue

    def checkFloatPos(value):
        fvalue = float(value)
        if fvalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive float value" % value)
        return fvalue

    '''
    Parse protoNN commandline arguments
    '''
    parser = argparse.ArgumentParser(
        description='Hyperparameters for ProtoNN Algorithm')

    msg = 'Data directory containing train and test data. The '
    msg += 'data is assumed to be saved as 2-D numpy matrices with '
    msg += 'names `train.npy` and `test.npy`, of dimensions\n'
    msg += '\t[numberOfInstances, numberOfFeatures + 1].\n'
    msg += 'The first column of each file is assumed to contain label information.'
    msg += ' For a N-class problem, labels are assumed to be integers from 0 to'
    msg += ' N-1 (inclusive).'
    parser.add_argument('-d', '--data-dir', required=True, help=msg)
    parser.add_argument('-l', '--projection-dim', type=checkIntPos, default=10,
                        help='Projection Dimension.')
    parser.add_argument('-p', '--num-prototypes', type=checkIntPos, default=20,
                        help='Number of prototypes.')
    parser.add_argument('-g', '--gamma', type=checkFloatPos, default=None,
                        help='Gamma for Gaussian kernel. If not provided, ' +
                        'median heuristic will be used to estimate gamma.')

    parser.add_argument('-e', '--epochs', type=checkIntPos, default=100,
                        help='Total training epochs.')
    parser.add_argument('-b', '--batch-size', type=checkIntPos, default=32,
                        help='Batch size for each pass.')
    parser.add_argument('-r', '--learning-rate', type=checkFloatPos,
                        default=0.001,
                        help='Initial Learning rate for ADAM Optimizer.')

    parser.add_argument('-rW', type=float, default=0.000,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter W ' + '(default = 0.0).')
    parser.add_argument('-rB', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter B ' + '(default = 0.0).')
    parser.add_argument('-rZ', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        'parameter Z ' +
                        '(default = 0.0).')

    parser.add_argument('-sW', type=float, default=1.000,
                        help='Sparsity constraint for predictor parameter W ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sB', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter B ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sZ', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter Z ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-pS', '--print-step', type=int, default=200,
                        help='The number of update steps between print ' +
                        'calls to console.')
    parser.add_argument('-vS', '--val-step', type=int, default=3,
                        help='The number of epochs between validation' +
                        'performance evaluation')
    return parser.parse_args()

#utils
import scipy.cluster
import scipy.spatial
import os


def medianHeuristic(data, projectionDimension, numPrototypes, W_init=None):
    '''
    This method can be used to estimate gamma for ProtoNN. An approximation to
    median heuristic is used here.
    1. First the data is collapsed into the projectionDimension by W_init. If
    W_init is not provided, it is initialized from a random normal(0, 1). Hence
    data normalization is essential.
    2. Prototype are computed by running a  k-means clustering on the projected
    data.
    3. The median distance is then estimated by calculating median distance
    between prototypes and projected data points.

    data needs to be [-1, numFeats]
    If using this method to initialize gamma, please use the W and B as well.

    TODO: Return estimate of Z (prototype labels) based on cluster centroids
    andand labels

    TODO: Clustering fails due to singularity error if projecting upwards

    W [dxd_cap]
    B [d_cap, m]
    returns gamma, W, B
    '''
    assert data.ndim == 2
    X = data
    featDim = data.shape[1]
    if projectionDimension > featDim:
        print("Warning: Projection dimension > feature dimension. Gamma")
        print("\t estimation due to median heuristic could fail.")
        print("\tTo retain the projection dataDimension, provide")
        print("\ta value for gamma.")

    if W_init is None:
        W_init = np.random.normal(size=[featDim, projectionDimension])
    W = W_init
    XW = np.matmul(X, W)
    assert XW.shape[1] == projectionDimension
    assert XW.shape[0] == len(X)
    # Requires [N x d_cap] data matrix of N observations of d_cap-dimension and
    # the number of centroids m. Returns, [n x d_cap] centroids and
    # elementwise center information.
    B, centers = scipy.cluster.vq.kmeans2(XW, numPrototypes)
    # Requires two matrices. Number of observations x dimension of observation
    # space. Distances[i,j] is the distance between XW[i] and B[j]
    distances = scipy.spatial.distance.cdist(XW, B, metric='euclidean')
    distances = np.reshape(distances, [-1])
    gamma = np.median(distances)
    gamma = 1 / (2.5 * gamma)
    return gamma.astype('float32'), W.astype('float32'), B.T.astype('float32')


def multiClassHingeLoss(logits, label, batch_th):
    '''
    MultiClassHingeLoss to match C++ Version - No TF internal version
    '''
    flatLogits = tf.reshape(logits, [-1, ])
    label_ = tf.argmax(label, 1)

    correctId = tf.range(0, batch_th) * label.shape[1] + label_
    correctLogit = tf.gather(flatLogits, correctId)

    maxLabel = tf.argmax(logits, 1)
    top2, _ = tf.nn.top_k(logits, k=2, sorted=True)

    wrongMaxLogit = tf.where(
        tf.equal(maxLabel, label_), top2[:, 1], top2[:, 0])

    return tf.reduce_mean(tf.nn.relu(1. + wrongMaxLogit - correctLogit))


def crossEntropyLoss(logits, label):
    '''
    Cross Entropy loss for MultiClass case in joint training for
    faster convergence
    '''
    return tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                   labels=tf.stop_gradient(label)))


def mean_absolute_error(logits, label):
    '''
    Function to compute the mean absolute error.
    '''
    return tf.reduce_mean(tf.abs(tf.subtract(logits, label)))


def hardThreshold(A, s):
    '''
    Hard thresholding function on Tensor A with sparsity s
    '''
    A_ = np.copy(A)
    A_ = A_.ravel()
    if len(A_) > 0:
        th = np.percentile(np.abs(A_), (1 - s) * 100.0, interpolation='higher')
        A_[np.abs(A_) < th] = 0.0
    A_ = A_.reshape(A.shape)
    return A_


def copySupport(src, dest):
    '''
    copy support of src tensor to dest tensor
    '''
    support = np.nonzero(src)
    dest_ = dest
    dest = np.zeros(dest_.shape)
    dest[support] = dest_[support]
    return dest


def countnnZ(A, s, bytesPerVar=4):
    '''
    Returns # of non-zeros and representative size of the tensor
    Uses dense for s >= 0.5 - 4 byte
    Else uses sparse - 8 byte
    '''
    params = 1
    hasSparse = False
    for i in range(0, len(A.shape)):
        params *= int(A.shape[i])
    if s < 0.5:
        nnZ = np.ceil(params * s)
        hasSparse = True
        return nnZ, nnZ * 2 * bytesPerVar, hasSparse
    else:
        nnZ = params
        return nnZ, nnZ * bytesPerVar, hasSparse


def getConfusionMatrix(predicted, target, numClasses):
    '''
    Returns a confusion matrix for a multiclass classification
    problem. `predicted` is a 1-D array of integers representing
    the predicted classes and `target` is the target classes.

    confusion[i][j]: Number of elements of class j
        predicted as class i
    Labels are assumed to be in range(0, numClasses)
    Use`printFormattedConfusionMatrix` to echo the confusion matrix
    in a user friendly form.
    '''
    assert(predicted.ndim == 1)
    assert(target.ndim == 1)
    arr = np.zeros([numClasses, numClasses])

    for i in range(len(predicted)):
        arr[predicted[i]][target[i]] += 1
    return arr


def printFormattedConfusionMatrix(matrix):
    '''
    Given a 2D confusion matrix, prints it in a human readable way.
    The confusion matrix is expected to be a 2D numpy array with
    square dimensions
    '''
    assert(matrix.ndim == 2)
    assert(matrix.shape[0] == matrix.shape[1])
    RECALL = 'Recall'
    PRECISION = 'PRECISION'
    print("|%s|" % ('True->'), end='')
    for i in range(matrix.shape[0]):
        print("%7d|" % i, end='')
    print("%s|" % 'Precision')

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))

    precisionlist = np.sum(matrix, axis=1)
    recalllist = np.sum(matrix, axis=0)
    precisionlist = [matrix[i][i] / x if x !=
                     0 else -1 for i, x in enumerate(precisionlist)]
    recalllist = [matrix[i][i] / x if x !=
                  0 else -1 for i, x in enumerate(recalllist)]
    for i in range(matrix.shape[0]):
        # len recall = 6
        print("|%6d|" % (i), end='')
        for j in range(matrix.shape[0]):
            print("%7d|" % (matrix[i][j]), end='')
        print("%s" % (" " * (len(PRECISION) - 7)), end='')
        if precisionlist[i] != -1:
            print("%1.5f|" % precisionlist[i])
        else:
            print("%7s|" % "nan")

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))
    print("|%s|" % ('Recall'), end='')

    for i in range(matrix.shape[0]):
        if recalllist[i] != -1:
            print("%1.5f|" % (recalllist[i]), end='')
        else:
            print("%7s|" % "nan", end='')

    print('%s|' % (' ' * len(PRECISION)))


def getPrecisionRecall(cmatrix, label=1):
    trueP = cmatrix[label][label]
    denom = np.sum(cmatrix, axis=0)[label]
    if denom == 0:
        denom = 1
    recall = trueP / denom
    denom = np.sum(cmatrix, axis=1)[label]
    if denom == 0:
        denom = 1
    precision = trueP / denom
    return precision, recall


def getMacroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    precision = np.sum(precisionlist__)
    precision /= len(precisionlist__)
    recall = np.sum(recalllist__)
    recall /= len(recalllist__)
    return precision, recall


def getMicroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    num = 0.0
    for i in range(len(cmatrix)):
        num += cmatrix[i][i]

    precision = num / np.sum(precisionlist)
    recall = num / np.sum(recalllist)
    return precision, recall


def getMacroMicroFScore(cmatrix):
    '''
    Returns macro and micro f-scores.
    Refer: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.8244&rep=rep1&type=pdf
    '''
    precisionlist = np.sum(cmatrix, axis=1)
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    macro = 0.0
    for i in range(len(precisionlist)):
        denom = precisionlist__[i] + recalllist__[i]
        numer = precisionlist__[i] * recalllist__[i] * 2
        if denom == 0:
            denom = 1
        macro += numer / denom
    macro /= len(precisionlist)

    num = 0.0
    for i in range(len(precisionlist)):
        num += cmatrix[i][i]

    denom1 = np.sum(precisionlist)
    denom2 = np.sum(recalllist)
    pi = num / denom1
    rho = num / denom2
    denom = pi + rho
    if denom == 0:
        denom = 1
    micro = 2 * pi * rho / denom
    return macro, micro


class GraphManager:
    '''
    Manages saving and restoring graphs. Designed to be used with EMI-RNN
    though is general enough to be useful otherwise as well.
    '''

    def __init__(self):
        pass

    def checkpointModel(self, saver, sess, modelPrefix,
                        globalStep=1000, redirFile=None):
        saver.save(sess, modelPrefix, global_step=globalStep)
        print('Model saved to %s, global_step %d' % (modelPrefix, globalStep),
              file=redirFile)

    def loadCheckpoint(self, sess, modelPrefix, globalStep,
                       redirFile=None):
        metaname = modelPrefix + '-%d.meta' % globalStep
        basename = os.path.basename(metaname)
        fileList = os.listdir(os.path.dirname(modelPrefix))
        fileList = [x for x in fileList if x.startswith(basename)]
        assert len(fileList) > 0, 'Checkpoint file not found'
        msg = 'Too many or too few checkpoint files for globalStep: %d' % globalStep
        assert len(fileList) is 1, msg
        chkpt = basename + '/' + fileList[0]
        saver = tf.train.import_meta_graph(metaname)
        metaname = metaname[:-5]
        saver.restore(sess, metaname)
        graph = tf.get_default_graph()
        return graph

#Trainer
class ProtoNNTrainer:
    def __init__(self, protoNNObj, regW, regB, regZ,
                 sparcityW, sparcityB, sparcityZ,
                 learningRate, X, Y, lossType='l2'):
        '''
        A wrapper for the various techniques used for training ProtoNN. This
        subsumes both the responsibility of loss graph construction and
        performing training. The original training routine that is part of the
        C++ implementation of EdgeML used iterative hard thresholding (IHT),
        gamma estimation through median heuristic and other tricks for
        training ProtoNN. This module implements the same in Tensorflow
        and python.

        protoNNObj: An instance of ProtoNN class defining the forward
            computation graph. The loss functions and training routines will be
            attached to this instance.
        regW, regB, regZ: Regularization constants for W, B, and
            Z matrices of protoNN.
        sparcityW, sparcityB, sparcityZ: Sparsity constraints
            for W, B and Z matrices. A value between 0 (exclusive) and 1
            (inclusive) is expected. A value of 1 indicates dense training.
        learningRate: Initial learning rate for ADAM optimizer.
        X, Y : Placeholders for data and labels.
            X [-1, featureDimension]
            Y [-1, num Labels]
        lossType: ['l2', 'xentropy']
        '''
        self.protoNNObj = protoNNObj
        self.__regW = regW
        self.__regB = regB
        self.__regZ = regZ
        self.__sW = sparcityW
        self.__sB = sparcityB
        self.__sZ = sparcityZ
        self.__lR = learningRate
        self.X = X
        self.Y = Y
        self.sparseTraining = True
        if (sparcityW == 1.0) and (sparcityB == 1.0) and (sparcityZ == 1.0):
            self.sparseTraining = False
            print("Sparse training disabled.", file=sys.stderr)
        # Define placeholders for sparse training
        self.W_th = None
        self.B_th = None
        self.Z_th = None
        self.__lossType = lossType
        self.__validInit = False
        self.__validInit = self.__validateInit()
        self.__protoNNOut = protoNNObj(X, Y)
        self.loss = self.__lossGraph()
        self.trainStep = self.__trainGraph()
        self.__hthOp = self.__getHardThresholdOp()
        self.accuracy = protoNNObj.getAccuracyOp()

    def __validateInit(self):
        self.__validInit = False
        msg = "Sparsity value should be between"
        msg += " 0 and 1 (both inclusive)."
        assert self.__sW >= 0. and self.__sW <= 1., 'W:' + msg
        assert self.__sB >= 0. and self.__sB <= 1., 'B:' + msg
        assert self.__sZ >= 0. and self.__sZ <= 1., 'Z:' + msg
        d, dcap, m, L, _ = self.protoNNObj.getHyperParams()
        msg = 'Y should be of dimension [-1, num labels/classes]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.Y.shape)) == 2, msg
        assert (self.Y.shape[1] == L), msg
        msg = 'X should be of dimension [-1, featureDimension]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.X.shape) == 2), msg
        assert (self.X.shape[1] == d), msg
        self.__validInit = True
        msg = 'Values can be \'l2\', or \'xentropy\''
        if self.__lossType not in ['l2', 'xentropy']:
            raise ValueError(msg)
        return True

    def __lossGraph(self):
        pnnOut = self.__protoNNOut
        l1, l2, l3 = self.__regW, self.__regB, self.__regZ
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        if self.__lossType == 'l2':
            with tf.name_scope('protonn-l2-loss'):
                loss_0 = tf.nn.l2_loss(self.Y - pnnOut)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        elif self.__lossType == 'xentropy':
            with tf.name_scope('protonn-xentropy-loss'):
                loss_0 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pnnOut,
                                                         labels=tf.stop_gradient(self.Y))
                loss_0 = tf.reduce_mean(loss_0)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        return loss

    def __trainGraph(self):
        with tf.name_scope('protonn-gradient-adam'):
            trainStep = tf.train.AdamOptimizer(self.__lR)
            trainStep = trainStep.minimize(self.loss)
        return trainStep

    def __getHardThresholdOp(self):
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        self.W_th = tf.placeholder(tf.float32, name='W_th')
        self.B_th = tf.placeholder(tf.float32, name='B_th')
        self.Z_th = tf.placeholder(tf.float32, name='Z_th')
        with tf.name_scope('hard-threshold-assignments'):
            hard_thrsd_W = W.assign(self.W_th)
            hard_thrsd_B = B.assign(self.B_th)
            hard_thrsd_Z = Z.assign(self.Z_th)
            hard_thrsd_op = tf.group(hard_thrsd_W, hard_thrsd_B, hard_thrsd_Z)
        return hard_thrsd_op

    def train(self, batchSize, totalEpochs, sess,
              x_train, x_val, y_train, y_val, noInit=False,
              redirFile=None, printStep=10, valStep=3):
        '''
        Performs dense training of ProtoNN followed by iterative hard
        thresholding to enforce sparsity constraints.

        batchSize: Batch size per update
        totalEpochs: The number of epochs to run training for. One epoch is
            defined as one pass over the entire training data.
        sess: The Tensorflow session to use for running various graph
            operators.
        x_train, x_val, y_train, y_val: The numpy array containing train and
            validation data. x data is assumed to in of shape [-1,
            featureDimension] while y should have shape [-1, numberLabels].
        noInit: By default, all the tensors of the computation graph are
        initialized at the start of the training session. Set noInit=False to
        disable this behaviour.
        printStep: Number of batches between echoing of loss and train accuracy.
        valStep: Number of epochs between evolutions on validation set.
        '''
        d, d_cap, m, L, gamma = self.protoNNObj.getHyperParams()
        assert batchSize >= 1, 'Batch size should be positive integer'
        assert totalEpochs >= 1, 'Total epochs should be positive integer'
        assert x_train.ndim == 2, 'Expected training data to be of rank 2'
        assert x_train.shape[1] == d, 'Expected x_train to be [-1, %d]' % d
        assert x_val.ndim == 2, 'Expected validation data to be of rank 2'
        assert x_val.shape[1] == d, 'Expected x_val to be [-1, %d]' % d
        assert y_train.ndim == 2, 'Expected training labels to be of rank 2'
        assert y_train.shape[1] == L, 'Expected y_train to be [-1, %d]' % L
        assert y_val.ndim == 2, 'Expected validation labels to be of rank 2'
        assert y_val.shape[1] == L, 'Expected y_val to be [-1, %d]' % L

        # Numpy will throw asserts for arrays
        if sess is None:
            raise ValueError('sess must be valid Tensorflow session.')

        trainNumBatches = int(np.ceil(len(x_train) / batchSize))
        valNumBatches = int(np.ceil(len(x_val) / batchSize))
        x_train_batches = np.array_split(x_train, trainNumBatches)
        y_train_batches = np.array_split(y_train, trainNumBatches)
        x_val_batches = np.array_split(x_val, valNumBatches)
        y_val_batches = np.array_split(y_val, valNumBatches)
        if not noInit:
            sess.run(tf.global_variables_initializer())
        X, Y = self.X, self.Y
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        for epoch in range(totalEpochs):
            for i in range(len(x_train_batches)):
                batch_x = x_train_batches[i]
                batch_y = y_train_batches[i]
                feed_dict = {
                    X: batch_x,
                    Y: batch_y
                }
                sess.run(self.trainStep, feed_dict=feed_dict)
                if i % printStep == 0:
                    loss, acc = sess.run([self.loss, self.accuracy],
                                         feed_dict=feed_dict)
                    msg = "Epoch: %3d Batch: %3d" % (epoch, i)
                    msg += " Loss: %3.5f Accuracy: %2.5f" % (loss, acc)
                    print(msg, file=redirFile)

            # Perform Hard thresholding
            if self.sparseTraining:
                W_, B_, Z_ = sess.run([W, B, Z])
                fd_thrsd = {
                    self.W_th: hardThreshold(W_, self.__sW),
                    self.B_th: hardThreshold(B_, self.__sB),
                    self.Z_th: hardThreshold(Z_, self.__sZ)
                }
                sess.run(self.__hthOp, feed_dict=fd_thrsd)

            if (epoch + 1) % valStep  == 0:
                acc = 0.0
                loss = 0.0
                for j in range(len(x_val_batches)):
                    batch_x = x_val_batches[j]
                    batch_y = y_val_batches[j]
                    feed_dict = {
                        X: batch_x,
                        Y: batch_y
                    }
                    acc_, loss_ = sess.run([self.accuracy, self.loss],
                                           feed_dict=feed_dict)
                    acc += acc_
                    loss += loss_
                acc /= len(y_val_batches)
                loss /= len(y_val_batches)
                print("Test Loss: %2.5f Accuracy: %2.5f" % (loss, acc))



class ProtoNN:
    def __init__(self, inputDimension, projectionDimension, numPrototypes,
                 numOutputLabels, gamma,
                 W = None, B = None, Z = None):
        '''
        Forward computation graph for ProtoNN.

        inputDimension: Input data dimension or feature dimension.
        projectionDimension: hyperparameter
        numPrototypes: hyperparameter
        numOutputLabels: The number of output labels or classes
        W, B, Z: Numpy matrices that can be used to initialize
            projection matrix(W), prototype matrix (B) and prototype labels
            matrix (B).
            Expected Dimensions:
                W   inputDimension (d) x projectionDimension (d_cap)
                B   projectionDimension (d_cap) x numPrototypes (m)
                Z   numOutputLabels (L) x numPrototypes (m)
        '''
        with tf.name_scope('protoNN') as ns:
            self.__nscope = ns
        self.__d = inputDimension
        self.__d_cap = projectionDimension
        self.__m = numPrototypes
        self.__L = numOutputLabels

        self.__inW = W
        self.__inB = B
        self.__inZ = Z
        self.__inGamma = gamma
        self.W, self.B, self.Z = None, None, None
        self.gamma = None

        self.__validInit = False
        self.__initWBZ()
        self.__initGamma()
        self.__validateInit()
        self.protoNNOut = None
        self.predictions = None
        self.accuracy = None

    def __validateInit(self):
        self.__validInit = False
        errmsg = "Dimensions mismatch! Should be W[d, d_cap]"
        errmsg += ", B[d_cap, m] and Z[L, m]"
        d, d_cap, m, L, _ = self.getHyperParams()
        assert self.W.shape[0] == d, errmsg
        assert self.W.shape[1] == d_cap, errmsg
        assert self.B.shape[0] == d_cap, errmsg
        assert self.B.shape[1] == m, errmsg
        assert self.Z.shape[0] == L, errmsg
        assert self.Z.shape[1] == m, errmsg
        self.__validInit = True

    def __initWBZ(self):
        with tf.name_scope(self.__nscope):
            W = self.__inW
            if W is None:
                W = tf.random_normal_initializer()
                W = W([self.__d, self.__d_cap])
            self.W = tf.Variable(W, name='W', dtype=tf.float32)

            B = self.__inB
            if B is None:
                B = tf.random_uniform_initializer()
                B = B([self.__d_cap, self.__m])
            self.B = tf.Variable(B, name='B', dtype=tf.float32)

            Z = self.__inZ
            if Z is None:
                Z = tf.random_normal_initializer()
                Z = Z([self.__L, self.__m])
            Z = tf.Variable(Z, name='Z', dtype=tf.float32)
            self.Z = Z
        return self.W, self.B, self.Z

    def __initGamma(self):
        with tf.name_scope(self.__nscope):
            gamma = self.__inGamma
            self.gamma = tf.constant(gamma, name='gamma')

    def getHyperParams(self):
        '''
        Returns the model hyperparameters:
            [inputDimension, projectionDimension,
            numPrototypes, numOutputLabels, gamma]
        '''
        d = self.__d
        dcap = self.__d_cap
        m = self.__m
        L = self.__L
        return d, dcap, m, L, self.gamma

    def getModelMatrices(self):
        '''
        Returns Tensorflow tensors of the model matrices, which
        can then be evaluated to obtain corresponding numpy arrays.

        These can then be exported as part of other implementations of
        ProtonNN, for instance a C++ implementation or pure python
        implementation.
        Returns
            [ProjectionMatrix (W), prototypeMatrix (B),
             prototypeLabelsMatrix (Z), gamma]
        '''
        return self.W, self.B, self.Z, self.gamma

    def __call__(self, X, Y=None):
        '''
        This method is responsible for construction of the forward computation
        graph. The end point of the computation graph, or in other words the
        output operator for the forward computation is returned. Additionally,
        if the argument Y is provided, a classification accuracy operator with
        Y as target will also be created. For this, Y is assumed to in one-hot
        encoded format and the class with the maximum prediction score is
        compared to the encoded class in Y.  This accuracy operator is returned
        by getAccuracyOp() method. If a different accuracyOp is required, it
        can be defined by overriding the createAccOp(protoNNScoresOut, Y)
        method.

        X: Input tensor or placeholder of shape [-1, inputDimension]
        Y: Optional tensor or placeholder for targets (labels or classes).
            Expected shape is [-1, numOutputLabels].
        returns: The forward computation outputs, self.protoNNOut
        '''
        # This should never execute
        assert self.__validInit is True, "Initialization failed!"
        if self.protoNNOut is not None:
            return self.protoNNOut

        W, B, Z, gamma = self.W, self.B, self.Z, self.gamma
        with tf.name_scope(self.__nscope):
            WX = tf.matmul(X, W)
            # Convert WX to tensor so that broadcasting can work
            dim = [-1, WX.shape.as_list()[1], 1]
            WX = tf.reshape(WX, dim)
            dim = [1, B.shape.as_list()[0], -1]
            B = tf.reshape(B, dim)
            l2sim = B - WX
            l2sim = tf.pow(l2sim, 2)
            l2sim = tf.reduce_sum(l2sim, 1, keepdims=True)
            self.l2sim = l2sim
            gammal2sim = (-1 * gamma * gamma) * l2sim
            M = tf.exp(gammal2sim)
            dim = [1] + Z.shape.as_list()
            Z = tf.reshape(Z, dim)
            y = tf.multiply(Z, M)
            y = tf.reduce_sum(y, 2, name='protoNNScoreOut')
            self.protoNNOut = y
            self.predictions = tf.argmax(y, 1, name='protoNNPredictions')
            if Y is not None:
                self.createAccOp(self.protoNNOut, Y)
        return y

    def createAccOp(self, outputs, target):
        '''
        Define an accuracy operation on ProtoNN's output scores and targets.
        Here a simple classification accuracy operator is defined. More
        complicated operators (for multiple label problems and so forth) can be
        defined by overriding this method
        '''
        assert self.predictions is not None
        target = tf.argmax(target, 1)
        correctPrediction = tf.equal(self.predictions, target)
        acc = tf.reduce_mean(tf.cast(correctPrediction, tf.float32),
                             name='protoNNAccuracy')
        self.accuracy = acc

    def getPredictionsOp(self):
        '''
        The predictions operator is defined as argmax(protoNNScores) for each
        prediction.
        '''
        return self.predictions

    def getAccuracyOp(self):
        '''
        returns accuracyOp as defined by createAccOp. It defaults to
        multi-class classification accuracy.
        '''
        msg = "Accuracy operator not defined in graph. Did you provide Y as an"
        msg += " argument to _call_?"
        assert self.accuracy is not None, msg
        return self.accuracy

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
DATA_DIR = r"./experiments"
windowLen = 'data_w3'
out = preprocessData(DATA_DIR,windowLen)
dataDimension = out[0]
numClasses = out[1]
x_train, y_train = out[2], out[3]
x_test, y_test = out[4], out[5]
print("Feature Dimension: ", dataDimension)
print("Num classes: ", numClasses)

Feature Dimension:  423
Num classes:  2


In [4]:

DATA_DIR = r"./experiments"
train, test = np.load(DATA_DIR + '/ttrain_data_w3.npy'), np.load(DATA_DIR + '/ttest_data_w3.npy')
x_train, y_train = train[:, 1:], train[:, 0]
x_test, y_test = test[:, 1:], test[:, 0]

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

numClasses = max(y_train) - min(y_train) + 1
numClasses = max(numClasses, max(y_test) - min(y_test) + 1)
numClasses = int(numClasses)

y_train = helper.to_onehot(y_train, numClasses)
y_test = helper.to_onehot(y_test, numClasses)
y_val = helper.to_onehot(y_val, numClasses)

dataDimension = x_train.shape[1]
numClasses = y_train.shape[1]

In [7]:
PROJECTION_DIM = 5 #d^
NUM_PROTOTYPES = 40 #m
REG_W = 0.000005
REG_B = 0.0
REG_Z = 0.00005
SPAR_W = 1.0
SPAR_B = 0.8
SPAR_Z = 0.8
LEARNING_RATE = 0.001
NUM_EPOCHS = 600
BATCH_SIZE = 2048
GAMMA = 0.007586
W, B, gamma = getGamma(GAMMA, PROJECTION_DIM, dataDimension,
                       NUM_PROTOTYPES, x_train)

gamma

0.007586

In [8]:
X = tf.placeholder(tf.float32, [None, dataDimension], name='X')
Y = tf.placeholder(tf.float32, [None, numClasses], name='Y')

protoNN = ProtoNN(dataDimension, PROJECTION_DIM,
                  NUM_PROTOTYPES, numClasses,
                  gamma, W=W, B=B)

trainer = ProtoNNTrainer(protoNN,  REG_W, REG_B, REG_Z,
                         SPAR_W, SPAR_B, SPAR_Z,
                        LEARNING_RATE, X, Y, lossType='l2')
sess = tf.Session()

trainer.train(2048, 800, sess, x_train, x_test, y_train, y_test,printStep=600, valStep=10)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch:   0 Batch:   0 Loss: 4993.86279 Accuracy: 0.49634
Epoch:   1 Batch:   0 Loss: 2749.55737 Accuracy: 0.49634
Epoch:   2 Batch:   0 Loss: 1998.62317 Accuracy: 0.49634
Epoch:   3 Batch:   0 Loss: 1437.14758 Accuracy: 0.49634
Epoch:   4 Batch:   0 Loss: 1033.77112 Accuracy: 0.49634
Epoch:   5 Batch:   0 Loss: 749.09021 Accuracy: 0.49634
Epoch:   6 Batch:   0 Loss: 555.66095 Accuracy: 0.49634
Epoch:   7 Batch:   0 Loss: 428.96310 Accuracy: 0.49634
Epoch:   8 Batch:   0 Loss: 349.11774 Accuracy: 0.49634
Epoch:   9 Batch:   0 Loss: 300.78848 Accuracy: 0.49634
Test Loss: 270.03414 Accuracy: 0.50013
Epoch:  10 Batch:   0 Loss: 272.72180 Accuracy: 0.49634
Epoch:  11 Batch:   0 Loss: 257.06842 Accuracy: 0.49634
Epoch:  12 Batch:   0 Loss: 248.63916 Accuracy: 0.49634
Epoch:  13 Batch:   0 Loss: 244.17857 Accuracy: 0.49634
Epoch:  14 Batch:   0 Loss: 241.80022 Accuracy: 0.49634
Epoch:  15 Batch:   0 

Epoch: 134 Batch:   0 Loss: 92.04651 Accuracy: 0.91850
Epoch: 135 Batch:   0 Loss: 91.18482 Accuracy: 0.91954
Epoch: 136 Batch:   0 Loss: 90.34028 Accuracy: 0.92059
Epoch: 137 Batch:   0 Loss: 89.51270 Accuracy: 0.92059
Epoch: 138 Batch:   0 Loss: 88.70174 Accuracy: 0.92059
Epoch: 139 Batch:   0 Loss: 87.90727 Accuracy: 0.92268
Test Loss: 161.81674 Accuracy: 0.75622
Epoch: 140 Batch:   0 Loss: 87.12898 Accuracy: 0.92268
Epoch: 141 Batch:   0 Loss: 86.36668 Accuracy: 0.92372
Epoch: 142 Batch:   0 Loss: 85.62008 Accuracy: 0.92372
Epoch: 143 Batch:   0 Loss: 84.88896 Accuracy: 0.92372
Epoch: 144 Batch:   0 Loss: 84.17307 Accuracy: 0.92372
Epoch: 145 Batch:   0 Loss: 83.47214 Accuracy: 0.92476
Epoch: 146 Batch:   0 Loss: 82.78599 Accuracy: 0.92476
Epoch: 147 Batch:   0 Loss: 82.11426 Accuracy: 0.92476
Epoch: 148 Batch:   0 Loss: 81.45680 Accuracy: 0.92581
Epoch: 149 Batch:   0 Loss: 80.81331 Accuracy: 0.92685
Test Loss: 161.00150 Accuracy: 0.75622
Epoch: 150 Batch:   0 Loss: 80.18355 Accur

Epoch: 275 Batch:   0 Loss: 51.59643 Accuracy: 0.94671
Epoch: 276 Batch:   0 Loss: 51.51987 Accuracy: 0.94671
Epoch: 277 Batch:   0 Loss: 51.44411 Accuracy: 0.94671
Epoch: 278 Batch:   0 Loss: 51.36912 Accuracy: 0.94671
Epoch: 279 Batch:   0 Loss: 51.29488 Accuracy: 0.94671
Test Loss: 163.68580 Accuracy: 0.76576
Epoch: 280 Batch:   0 Loss: 51.22139 Accuracy: 0.94671
Epoch: 281 Batch:   0 Loss: 51.14862 Accuracy: 0.94671
Epoch: 282 Batch:   0 Loss: 51.07660 Accuracy: 0.94671
Epoch: 283 Batch:   0 Loss: 51.00527 Accuracy: 0.94671
Epoch: 284 Batch:   0 Loss: 50.93464 Accuracy: 0.94671
Epoch: 285 Batch:   0 Loss: 50.86470 Accuracy: 0.94671
Epoch: 286 Batch:   0 Loss: 50.79547 Accuracy: 0.94671
Epoch: 287 Batch:   0 Loss: 50.72689 Accuracy: 0.94671
Epoch: 288 Batch:   0 Loss: 50.65896 Accuracy: 0.94671
Epoch: 289 Batch:   0 Loss: 50.59170 Accuracy: 0.94671
Test Loss: 164.45956 Accuracy: 0.76616
Epoch: 290 Batch:   0 Loss: 50.52507 Accuracy: 0.94671
Epoch: 291 Batch:   0 Loss: 50.45908 Accur

Epoch: 415 Batch:   0 Loss: 45.20063 Accuracy: 0.95089
Epoch: 416 Batch:   0 Loss: 45.17252 Accuracy: 0.95089
Epoch: 417 Batch:   0 Loss: 45.14454 Accuracy: 0.95089
Epoch: 418 Batch:   0 Loss: 45.11669 Accuracy: 0.95089
Epoch: 419 Batch:   0 Loss: 45.08898 Accuracy: 0.95089
Test Loss: 177.45249 Accuracy: 0.75305
Epoch: 420 Batch:   0 Loss: 45.06140 Accuracy: 0.95089
Epoch: 421 Batch:   0 Loss: 45.03394 Accuracy: 0.95089
Epoch: 422 Batch:   0 Loss: 45.00661 Accuracy: 0.95089
Epoch: 423 Batch:   0 Loss: 44.97942 Accuracy: 0.95089
Epoch: 424 Batch:   0 Loss: 44.95235 Accuracy: 0.95089
Epoch: 425 Batch:   0 Loss: 44.92541 Accuracy: 0.95089
Epoch: 426 Batch:   0 Loss: 44.89859 Accuracy: 0.95089
Epoch: 427 Batch:   0 Loss: 44.87186 Accuracy: 0.95089
Epoch: 428 Batch:   0 Loss: 44.84529 Accuracy: 0.95089
Epoch: 429 Batch:   0 Loss: 44.81883 Accuracy: 0.95089
Test Loss: 178.28396 Accuracy: 0.75252
Epoch: 430 Batch:   0 Loss: 44.79248 Accuracy: 0.95089
Epoch: 431 Batch:   0 Loss: 44.76625 Accur

Epoch: 556 Batch:   0 Loss: 42.11421 Accuracy: 0.95507
Epoch: 557 Batch:   0 Loss: 42.09645 Accuracy: 0.95507
Epoch: 558 Batch:   0 Loss: 42.07874 Accuracy: 0.95507
Epoch: 559 Batch:   0 Loss: 42.06104 Accuracy: 0.95507
Test Loss: 185.95644 Accuracy: 0.74743
Epoch: 560 Batch:   0 Loss: 42.04341 Accuracy: 0.95507
Epoch: 561 Batch:   0 Loss: 42.02581 Accuracy: 0.95611
Epoch: 562 Batch:   0 Loss: 42.00824 Accuracy: 0.95611
Epoch: 563 Batch:   0 Loss: 41.99072 Accuracy: 0.95611
Epoch: 564 Batch:   0 Loss: 41.97322 Accuracy: 0.95611
Epoch: 565 Batch:   0 Loss: 41.95575 Accuracy: 0.95611
Epoch: 566 Batch:   0 Loss: 41.93835 Accuracy: 0.95611
Epoch: 567 Batch:   0 Loss: 41.92096 Accuracy: 0.95611
Epoch: 568 Batch:   0 Loss: 41.90361 Accuracy: 0.95611
Epoch: 569 Batch:   0 Loss: 41.88631 Accuracy: 0.95611
Test Loss: 186.51765 Accuracy: 0.74770
Epoch: 570 Batch:   0 Loss: 41.86903 Accuracy: 0.95611
Epoch: 571 Batch:   0 Loss: 41.85180 Accuracy: 0.95611
Epoch: 572 Batch:   0 Loss: 41.83459 Accur

Epoch: 697 Batch:   0 Loss: 39.89101 Accuracy: 0.95716
Epoch: 698 Batch:   0 Loss: 39.87683 Accuracy: 0.95716
Epoch: 699 Batch:   0 Loss: 39.86265 Accuracy: 0.95716
Test Loss: 193.91769 Accuracy: 0.74676
Epoch: 700 Batch:   0 Loss: 39.84851 Accuracy: 0.95716
Epoch: 701 Batch:   0 Loss: 39.83438 Accuracy: 0.95716
Epoch: 702 Batch:   0 Loss: 39.82028 Accuracy: 0.95716
Epoch: 703 Batch:   0 Loss: 39.80618 Accuracy: 0.95716
Epoch: 704 Batch:   0 Loss: 39.79211 Accuracy: 0.95716
Epoch: 705 Batch:   0 Loss: 39.77806 Accuracy: 0.95716
Epoch: 706 Batch:   0 Loss: 39.76404 Accuracy: 0.95716
Epoch: 707 Batch:   0 Loss: 39.75003 Accuracy: 0.95716
Epoch: 708 Batch:   0 Loss: 39.73605 Accuracy: 0.95716
Epoch: 709 Batch:   0 Loss: 39.72209 Accuracy: 0.95820
Test Loss: 194.50344 Accuracy: 0.74663
Epoch: 710 Batch:   0 Loss: 39.70816 Accuracy: 0.95820
Epoch: 711 Batch:   0 Loss: 39.69424 Accuracy: 0.95820
Epoch: 712 Batch:   0 Loss: 39.68032 Accuracy: 0.95820
Epoch: 713 Batch:   0 Loss: 39.66644 Accur

In [9]:
acc = sess.run(protoNN.accuracy, feed_dict={X: x_test, Y: y_test})
pred = sess.run(protoNN.predictions, feed_dict={X: x_test, Y: y_test})
# W, B, Z are tensorflow graph nodes
W, B, Z, _ = protoNN.getModelMatrices()
matrixList = sess.run([W, B, Z])
sparcityList = [SPAR_W, SPAR_B, SPAR_Z]                       
nnz, size, sparse = getModelSize(matrixList, sparcityList)
print("Final test accuracy", acc)
print("Model size constraint (Bytes): ", size)
print("Number of non-zeros: ", nnz)

Final test accuracy 0.7464506
Model size constraint (Bytes):  9580
Number of non-zeros:  2395


In [10]:
from sklearn.metrics import confusion_matrix,classification_report
y_test = np.argmax(y_test,axis=1)
print (confusion_matrix(y_test,pred))
print (classification_report(y_test,pred,digits=5))

[[2386 1347]
 [ 546 3187]]
              precision    recall  f1-score   support

           0    0.81378   0.63916   0.71598      3733
           1    0.70291   0.85374   0.77102      3733

    accuracy                        0.74645      7466
   macro avg    0.75835   0.74645   0.74350      7466
weighted avg    0.75835   0.74645   0.74350      7466



In [11]:
sensitivity = confusion_matrix(y_test,pred)[1][1]/(confusion_matrix(y_test,pred)[1][1] + confusion_matrix(y_test,pred)[1][0])
sensitivity

0.8537369407982855

In [12]:
specificity = confusion_matrix(y_test,pred)[0][0]/(confusion_matrix(y_test,pred)[0][0] + confusion_matrix(y_test,pred)[0][1])
specificity

0.6391642110902759

**WINDOW 4**

In [13]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

from __future__ import print_function
import sys
import os
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

#sys.path.insert(0, '../../')
# from edgeml.trainer.protoNNTrainer import ProtoNNTrainer
# from edgeml.graph.protoNN import ProtoNN
# import edgeml.utils as utils
# import helpermethods as helper
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV


sys.path.append(r"E:\programming\practice\research\optimized code\EdgeML\examples\tf\ProtoNN")
import helpermethods as helper

#helper methods
sys.path.insert(0, '../')
import argparse


def getModelSize(matrixList, sparcityList, expected=True, bytesPerVar=4):
    '''
    expected: Expected size according to the parameters set. The number of
        zeros could actually be more than that is required to satisfy the
        sparsity constraint.
    '''
    nnzList, sizeList, isSparseList = [], [], []
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        assert A.ndim == 2
        assert s >= 0
        assert s <= 1
        nnz, size, sparse = countnnZ(A, s, bytesPerVar=bytesPerVar)
        nnzList.append(nnz)
        sizeList.append(size)
        hasSparse = (hasSparse or sparse)

    totalnnZ = np.sum(nnzList)
    totalSize = np.sum(sizeList)
    if expected:
        return totalnnZ, totalSize, hasSparse
    numNonZero = 0
    totalSize = 0
    hasSparse = False
    for i in range(len(matrixList)):
        A, s = matrixList[i], sparcityList[i]
        numNonZero_ = np.count_nonzero(A)
        numNonZero += numNonZero_
        hasSparse = (hasSparse or (s < 0.5))
        if s <= 0.5:
            totalSize += numNonZero_ * 2 * bytesPerVar
        else:
            totalSize += A.size * bytesPerVar
    return numNonZero, totalSize, hasSparse


def getGamma(gammaInit, projectionDim, dataDim, numPrototypes, x_train):
    if gammaInit is None:
        print("Using median heuristic to estimate gamma.")
        gamma, W, B = medianHeuristic(x_train, projectionDim,
                                            numPrototypes)
        print("Gamma estimate is: %f" % gamma)
        return W, B, gamma
    return None, None, gammaInit


def preprocessData(dataDir,w):
    '''
    Loads data from the dataDir and does some initial preprocessing
    steps. Data is assumed to be contained in two files,
    train.npy and test.npy. Each containing a 2D numpy array of dimension
    [numberOfExamples, numberOfFeatures + 1]. The first column of each
    matrix is assumed to contain label information.

    For an N-Class problem, we assume the labels are integers from 0 through
    N-1.
    '''
    # Uncomment for usual training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')
    # Uncomment for time domain training data
    train = np.load(dataDir + '/ttrain_'+str(w)+'.npy')
    test = np.load(dataDir + '/ttest_'+str(w)+'.npy')
    # Uncomment for 1 sensordrop training data
    # train = np.load(dataDir + '/train_'+str(w)+'.npy')
    # test = np.load(dataDir + '/test_'+str(w)+'.npy')

    dataDimension = int(train.shape[1]) - 1
    x_train = train[:, 1:dataDimension + 1]
    y_train_ = train[:, 0]
    x_test = test[:, 1:dataDimension + 1]
    y_test_ = test[:, 0]

    numClasses = max(y_train_) - min(y_train_) + 1
    numClasses = max(numClasses, max(y_test_) - min(y_test_) + 1)
    numClasses = int(numClasses)

    # mean-var
    mean = np.mean(x_train, 0)
    std = np.std(x_train, 0)
    std[std[:] < 0.000001] = 1
    x_train = (x_train - mean) / std
    x_test = (x_test - mean) / std

    # one hot y-train
    lab = y_train_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_train.shape[0], numClasses))
    lab_[np.arange(x_train.shape[0]), lab] = 1
    y_train = lab_

    # one hot y-test
    lab = y_test_.astype('uint8')
    lab = np.array(lab) - min(lab)
    lab_ = np.zeros((x_test.shape[0], numClasses))
    lab_[np.arange(x_test.shape[0]), lab] = 1
    y_test = lab_

    return dataDimension, numClasses, x_train, y_train, x_test, y_test



def getProtoNNArgs():
    def checkIntPos(value):
        ivalue = int(value)
        if ivalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive int value" % value)
        return ivalue

    def checkIntNneg(value):
        ivalue = int(value)
        if ivalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg int value" % value)
        return ivalue

    def checkFloatNneg(value):
        fvalue = float(value)
        if fvalue < 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid non-neg float value" % value)
        return fvalue

    def checkFloatPos(value):
        fvalue = float(value)
        if fvalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive float value" % value)
        return fvalue

    '''
    Parse protoNN commandline arguments
    '''
    parser = argparse.ArgumentParser(
        description='Hyperparameters for ProtoNN Algorithm')

    msg = 'Data directory containing train and test data. The '
    msg += 'data is assumed to be saved as 2-D numpy matrices with '
    msg += 'names `train.npy` and `test.npy`, of dimensions\n'
    msg += '\t[numberOfInstances, numberOfFeatures + 1].\n'
    msg += 'The first column of each file is assumed to contain label information.'
    msg += ' For a N-class problem, labels are assumed to be integers from 0 to'
    msg += ' N-1 (inclusive).'
    parser.add_argument('-d', '--data-dir', required=True, help=msg)
    parser.add_argument('-l', '--projection-dim', type=checkIntPos, default=10,
                        help='Projection Dimension.')
    parser.add_argument('-p', '--num-prototypes', type=checkIntPos, default=20,
                        help='Number of prototypes.')
    parser.add_argument('-g', '--gamma', type=checkFloatPos, default=None,
                        help='Gamma for Gaussian kernel. If not provided, ' +
                        'median heuristic will be used to estimate gamma.')

    parser.add_argument('-e', '--epochs', type=checkIntPos, default=100,
                        help='Total training epochs.')
    parser.add_argument('-b', '--batch-size', type=checkIntPos, default=32,
                        help='Batch size for each pass.')
    parser.add_argument('-r', '--learning-rate', type=checkFloatPos,
                        default=0.001,
                        help='Initial Learning rate for ADAM Optimizer.')

    parser.add_argument('-rW', type=float, default=0.000,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter W ' + '(default = 0.0).')
    parser.add_argument('-rB', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        ' parameter B ' + '(default = 0.0).')
    parser.add_argument('-rZ', type=float, default=0.00,
                        help='Coefficient for l2 regularizer for predictor' +
                        'parameter Z ' +
                        '(default = 0.0).')

    parser.add_argument('-sW', type=float, default=1.000,
                        help='Sparsity constraint for predictor parameter W ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sB', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter B ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-sZ', type=float, default=1.00,
                        help='Sparsity constraint for predictor parameter Z ' +
                        '(default = 1.0, i.e. dense matrix).')
    parser.add_argument('-pS', '--print-step', type=int, default=200,
                        help='The number of update steps between print ' +
                        'calls to console.')
    parser.add_argument('-vS', '--val-step', type=int, default=3,
                        help='The number of epochs between validation' +
                        'performance evaluation')
    return parser.parse_args()

#utils
import scipy.cluster
import scipy.spatial
import os


def medianHeuristic(data, projectionDimension, numPrototypes, W_init=None):
    '''
    This method can be used to estimate gamma for ProtoNN. An approximation to
    median heuristic is used here.
    1. First the data is collapsed into the projectionDimension by W_init. If
    W_init is not provided, it is initialized from a random normal(0, 1). Hence
    data normalization is essential.
    2. Prototype are computed by running a  k-means clustering on the projected
    data.
    3. The median distance is then estimated by calculating median distance
    between prototypes and projected data points.

    data needs to be [-1, numFeats]
    If using this method to initialize gamma, please use the W and B as well.

    TODO: Return estimate of Z (prototype labels) based on cluster centroids
    andand labels

    TODO: Clustering fails due to singularity error if projecting upwards

    W [dxd_cap]
    B [d_cap, m]
    returns gamma, W, B
    '''
    assert data.ndim == 2
    X = data
    featDim = data.shape[1]
    if projectionDimension > featDim:
        print("Warning: Projection dimension > feature dimension. Gamma")
        print("\t estimation due to median heuristic could fail.")
        print("\tTo retain the projection dataDimension, provide")
        print("\ta value for gamma.")

    if W_init is None:
        W_init = np.random.normal(size=[featDim, projectionDimension])
    W = W_init
    XW = np.matmul(X, W)
    assert XW.shape[1] == projectionDimension
    assert XW.shape[0] == len(X)
    # Requires [N x d_cap] data matrix of N observations of d_cap-dimension and
    # the number of centroids m. Returns, [n x d_cap] centroids and
    # elementwise center information.
    B, centers = scipy.cluster.vq.kmeans2(XW, numPrototypes)
    # Requires two matrices. Number of observations x dimension of observation
    # space. Distances[i,j] is the distance between XW[i] and B[j]
    distances = scipy.spatial.distance.cdist(XW, B, metric='euclidean')
    distances = np.reshape(distances, [-1])
    gamma = np.median(distances)
    gamma = 1 / (2.5 * gamma)
    return gamma.astype('float32'), W.astype('float32'), B.T.astype('float32')


def multiClassHingeLoss(logits, label, batch_th):
    '''
    MultiClassHingeLoss to match C++ Version - No TF internal version
    '''
    flatLogits = tf.reshape(logits, [-1, ])
    label_ = tf.argmax(label, 1)

    correctId = tf.range(0, batch_th) * label.shape[1] + label_
    correctLogit = tf.gather(flatLogits, correctId)

    maxLabel = tf.argmax(logits, 1)
    top2, _ = tf.nn.top_k(logits, k=2, sorted=True)

    wrongMaxLogit = tf.where(
        tf.equal(maxLabel, label_), top2[:, 1], top2[:, 0])

    return tf.reduce_mean(tf.nn.relu(1. + wrongMaxLogit - correctLogit))


def crossEntropyLoss(logits, label):
    '''
    Cross Entropy loss for MultiClass case in joint training for
    faster convergence
    '''
    return tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                   labels=tf.stop_gradient(label)))


def mean_absolute_error(logits, label):
    '''
    Function to compute the mean absolute error.
    '''
    return tf.reduce_mean(tf.abs(tf.subtract(logits, label)))


def hardThreshold(A, s):
    '''
    Hard thresholding function on Tensor A with sparsity s
    '''
    A_ = np.copy(A)
    A_ = A_.ravel()
    if len(A_) > 0:
        th = np.percentile(np.abs(A_), (1 - s) * 100.0, interpolation='higher')
        A_[np.abs(A_) < th] = 0.0
    A_ = A_.reshape(A.shape)
    return A_


def copySupport(src, dest):
    '''
    copy support of src tensor to dest tensor
    '''
    support = np.nonzero(src)
    dest_ = dest
    dest = np.zeros(dest_.shape)
    dest[support] = dest_[support]
    return dest


def countnnZ(A, s, bytesPerVar=4):
    '''
    Returns # of non-zeros and representative size of the tensor
    Uses dense for s >= 0.5 - 4 byte
    Else uses sparse - 8 byte
    '''
    params = 1
    hasSparse = False
    for i in range(0, len(A.shape)):
        params *= int(A.shape[i])
    if s < 0.5:
        nnZ = np.ceil(params * s)
        hasSparse = True
        return nnZ, nnZ * 2 * bytesPerVar, hasSparse
    else:
        nnZ = params
        return nnZ, nnZ * bytesPerVar, hasSparse


def getConfusionMatrix(predicted, target, numClasses):
    '''
    Returns a confusion matrix for a multiclass classification
    problem. `predicted` is a 1-D array of integers representing
    the predicted classes and `target` is the target classes.

    confusion[i][j]: Number of elements of class j
        predicted as class i
    Labels are assumed to be in range(0, numClasses)
    Use`printFormattedConfusionMatrix` to echo the confusion matrix
    in a user friendly form.
    '''
    assert(predicted.ndim == 1)
    assert(target.ndim == 1)
    arr = np.zeros([numClasses, numClasses])

    for i in range(len(predicted)):
        arr[predicted[i]][target[i]] += 1
    return arr


def printFormattedConfusionMatrix(matrix):
    '''
    Given a 2D confusion matrix, prints it in a human readable way.
    The confusion matrix is expected to be a 2D numpy array with
    square dimensions
    '''
    assert(matrix.ndim == 2)
    assert(matrix.shape[0] == matrix.shape[1])
    RECALL = 'Recall'
    PRECISION = 'PRECISION'
    print("|%s|" % ('True->'), end='')
    for i in range(matrix.shape[0]):
        print("%7d|" % i, end='')
    print("%s|" % 'Precision')

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))

    precisionlist = np.sum(matrix, axis=1)
    recalllist = np.sum(matrix, axis=0)
    precisionlist = [matrix[i][i] / x if x !=
                     0 else -1 for i, x in enumerate(precisionlist)]
    recalllist = [matrix[i][i] / x if x !=
                  0 else -1 for i, x in enumerate(recalllist)]
    for i in range(matrix.shape[0]):
        # len recall = 6
        print("|%6d|" % (i), end='')
        for j in range(matrix.shape[0]):
            print("%7d|" % (matrix[i][j]), end='')
        print("%s" % (" " * (len(PRECISION) - 7)), end='')
        if precisionlist[i] != -1:
            print("%1.5f|" % precisionlist[i])
        else:
            print("%7s|" % "nan")

    print("|%s|" % ('-' * len(RECALL)), end='')
    for i in range(matrix.shape[0]):
        print("%s|" % ('-' * 7), end='')
    print("%s|" % ('-' * len(PRECISION)))
    print("|%s|" % ('Recall'), end='')

    for i in range(matrix.shape[0]):
        if recalllist[i] != -1:
            print("%1.5f|" % (recalllist[i]), end='')
        else:
            print("%7s|" % "nan", end='')

    print('%s|' % (' ' * len(PRECISION)))


def getPrecisionRecall(cmatrix, label=1):
    trueP = cmatrix[label][label]
    denom = np.sum(cmatrix, axis=0)[label]
    if denom == 0:
        denom = 1
    recall = trueP / denom
    denom = np.sum(cmatrix, axis=1)[label]
    if denom == 0:
        denom = 1
    precision = trueP / denom
    return precision, recall


def getMacroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    precision = np.sum(precisionlist__)
    precision /= len(precisionlist__)
    recall = np.sum(recalllist__)
    recall /= len(recalllist__)
    return precision, recall


def getMicroPrecisionRecall(cmatrix):
    # TP + FP
    precisionlist = np.sum(cmatrix, axis=1)
    # TP + FN
    recalllist = np.sum(cmatrix, axis=0)
    num = 0.0
    for i in range(len(cmatrix)):
        num += cmatrix[i][i]

    precision = num / np.sum(precisionlist)
    recall = num / np.sum(recalllist)
    return precision, recall


def getMacroMicroFScore(cmatrix):
    '''
    Returns macro and micro f-scores.
    Refer: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.8244&rep=rep1&type=pdf
    '''
    precisionlist = np.sum(cmatrix, axis=1)
    recalllist = np.sum(cmatrix, axis=0)
    precisionlist__ = [cmatrix[i][i] / x if x !=
                       0 else 0 for i, x in enumerate(precisionlist)]
    recalllist__ = [cmatrix[i][i] / x if x !=
                    0 else 0 for i, x in enumerate(recalllist)]
    macro = 0.0
    for i in range(len(precisionlist)):
        denom = precisionlist__[i] + recalllist__[i]
        numer = precisionlist__[i] * recalllist__[i] * 2
        if denom == 0:
            denom = 1
        macro += numer / denom
    macro /= len(precisionlist)

    num = 0.0
    for i in range(len(precisionlist)):
        num += cmatrix[i][i]

    denom1 = np.sum(precisionlist)
    denom2 = np.sum(recalllist)
    pi = num / denom1
    rho = num / denom2
    denom = pi + rho
    if denom == 0:
        denom = 1
    micro = 2 * pi * rho / denom
    return macro, micro


class GraphManager:
    '''
    Manages saving and restoring graphs. Designed to be used with EMI-RNN
    though is general enough to be useful otherwise as well.
    '''

    def __init__(self):
        pass

    def checkpointModel(self, saver, sess, modelPrefix,
                        globalStep=1000, redirFile=None):
        saver.save(sess, modelPrefix, global_step=globalStep)
        print('Model saved to %s, global_step %d' % (modelPrefix, globalStep),
              file=redirFile)

    def loadCheckpoint(self, sess, modelPrefix, globalStep,
                       redirFile=None):
        metaname = modelPrefix + '-%d.meta' % globalStep
        basename = os.path.basename(metaname)
        fileList = os.listdir(os.path.dirname(modelPrefix))
        fileList = [x for x in fileList if x.startswith(basename)]
        assert len(fileList) > 0, 'Checkpoint file not found'
        msg = 'Too many or too few checkpoint files for globalStep: %d' % globalStep
        assert len(fileList) is 1, msg
        chkpt = basename + '/' + fileList[0]
        saver = tf.train.import_meta_graph(metaname)
        metaname = metaname[:-5]
        saver.restore(sess, metaname)
        graph = tf.get_default_graph()
        return graph

#Trainer
class ProtoNNTrainer:
    def __init__(self, protoNNObj, regW, regB, regZ,
                 sparcityW, sparcityB, sparcityZ,
                 learningRate, X, Y, lossType='l2'):
        '''
        A wrapper for the various techniques used for training ProtoNN. This
        subsumes both the responsibility of loss graph construction and
        performing training. The original training routine that is part of the
        C++ implementation of EdgeML used iterative hard thresholding (IHT),
        gamma estimation through median heuristic and other tricks for
        training ProtoNN. This module implements the same in Tensorflow
        and python.

        protoNNObj: An instance of ProtoNN class defining the forward
            computation graph. The loss functions and training routines will be
            attached to this instance.
        regW, regB, regZ: Regularization constants for W, B, and
            Z matrices of protoNN.
        sparcityW, sparcityB, sparcityZ: Sparsity constraints
            for W, B and Z matrices. A value between 0 (exclusive) and 1
            (inclusive) is expected. A value of 1 indicates dense training.
        learningRate: Initial learning rate for ADAM optimizer.
        X, Y : Placeholders for data and labels.
            X [-1, featureDimension]
            Y [-1, num Labels]
        lossType: ['l2', 'xentropy']
        '''
        self.protoNNObj = protoNNObj
        self.__regW = regW
        self.__regB = regB
        self.__regZ = regZ
        self.__sW = sparcityW
        self.__sB = sparcityB
        self.__sZ = sparcityZ
        self.__lR = learningRate
        self.X = X
        self.Y = Y
        self.sparseTraining = True
        if (sparcityW == 1.0) and (sparcityB == 1.0) and (sparcityZ == 1.0):
            self.sparseTraining = False
            print("Sparse training disabled.", file=sys.stderr)
        # Define placeholders for sparse training
        self.W_th = None
        self.B_th = None
        self.Z_th = None
        self.__lossType = lossType
        self.__validInit = False
        self.__validInit = self.__validateInit()
        self.__protoNNOut = protoNNObj(X, Y)
        self.loss = self.__lossGraph()
        self.trainStep = self.__trainGraph()
        self.__hthOp = self.__getHardThresholdOp()
        self.accuracy = protoNNObj.getAccuracyOp()

    def __validateInit(self):
        self.__validInit = False
        msg = "Sparsity value should be between"
        msg += " 0 and 1 (both inclusive)."
        assert self.__sW >= 0. and self.__sW <= 1., 'W:' + msg
        assert self.__sB >= 0. and self.__sB <= 1., 'B:' + msg
        assert self.__sZ >= 0. and self.__sZ <= 1., 'Z:' + msg
        d, dcap, m, L, _ = self.protoNNObj.getHyperParams()
        msg = 'Y should be of dimension [-1, num labels/classes]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.Y.shape)) == 2, msg
        assert (self.Y.shape[1] == L), msg
        msg = 'X should be of dimension [-1, featureDimension]'
        msg += ' specified as part of ProtoNN object.'
        assert (len(self.X.shape) == 2), msg
        assert (self.X.shape[1] == d), msg
        self.__validInit = True
        msg = 'Values can be \'l2\', or \'xentropy\''
        if self.__lossType not in ['l2', 'xentropy']:
            raise ValueError(msg)
        return True

    def __lossGraph(self):
        pnnOut = self.__protoNNOut
        l1, l2, l3 = self.__regW, self.__regB, self.__regZ
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        if self.__lossType == 'l2':
            with tf.name_scope('protonn-l2-loss'):
                loss_0 = tf.nn.l2_loss(self.Y - pnnOut)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        elif self.__lossType == 'xentropy':
            with tf.name_scope('protonn-xentropy-loss'):
                loss_0 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pnnOut,
                                                         labels=tf.stop_gradient(self.Y))
                loss_0 = tf.reduce_mean(loss_0)
                reg = l1 * tf.nn.l2_loss(W) + l2 * tf.nn.l2_loss(B)
                reg += l3 * tf.nn.l2_loss(Z)
                loss = loss_0 + reg
        return loss

    def __trainGraph(self):
        with tf.name_scope('protonn-gradient-adam'):
            trainStep = tf.train.AdamOptimizer(self.__lR)
            trainStep = trainStep.minimize(self.loss)
        return trainStep

    def __getHardThresholdOp(self):
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        self.W_th = tf.placeholder(tf.float32, name='W_th')
        self.B_th = tf.placeholder(tf.float32, name='B_th')
        self.Z_th = tf.placeholder(tf.float32, name='Z_th')
        with tf.name_scope('hard-threshold-assignments'):
            hard_thrsd_W = W.assign(self.W_th)
            hard_thrsd_B = B.assign(self.B_th)
            hard_thrsd_Z = Z.assign(self.Z_th)
            hard_thrsd_op = tf.group(hard_thrsd_W, hard_thrsd_B, hard_thrsd_Z)
        return hard_thrsd_op

    def train(self, batchSize, totalEpochs, sess,
              x_train, x_val, y_train, y_val, noInit=False,
              redirFile=None, printStep=10, valStep=3):
        '''
        Performs dense training of ProtoNN followed by iterative hard
        thresholding to enforce sparsity constraints.

        batchSize: Batch size per update
        totalEpochs: The number of epochs to run training for. One epoch is
            defined as one pass over the entire training data.
        sess: The Tensorflow session to use for running various graph
            operators.
        x_train, x_val, y_train, y_val: The numpy array containing train and
            validation data. x data is assumed to in of shape [-1,
            featureDimension] while y should have shape [-1, numberLabels].
        noInit: By default, all the tensors of the computation graph are
        initialized at the start of the training session. Set noInit=False to
        disable this behaviour.
        printStep: Number of batches between echoing of loss and train accuracy.
        valStep: Number of epochs between evolutions on validation set.
        '''
        d, d_cap, m, L, gamma = self.protoNNObj.getHyperParams()
        assert batchSize >= 1, 'Batch size should be positive integer'
        assert totalEpochs >= 1, 'Total epochs should be positive integer'
        assert x_train.ndim == 2, 'Expected training data to be of rank 2'
        assert x_train.shape[1] == d, 'Expected x_train to be [-1, %d]' % d
        assert x_val.ndim == 2, 'Expected validation data to be of rank 2'
        assert x_val.shape[1] == d, 'Expected x_val to be [-1, %d]' % d
        assert y_train.ndim == 2, 'Expected training labels to be of rank 2'
        assert y_train.shape[1] == L, 'Expected y_train to be [-1, %d]' % L
        assert y_val.ndim == 2, 'Expected validation labels to be of rank 2'
        assert y_val.shape[1] == L, 'Expected y_val to be [-1, %d]' % L

        # Numpy will throw asserts for arrays
        if sess is None:
            raise ValueError('sess must be valid Tensorflow session.')

        trainNumBatches = int(np.ceil(len(x_train) / batchSize))
        valNumBatches = int(np.ceil(len(x_val) / batchSize))
        x_train_batches = np.array_split(x_train, trainNumBatches)
        y_train_batches = np.array_split(y_train, trainNumBatches)
        x_val_batches = np.array_split(x_val, valNumBatches)
        y_val_batches = np.array_split(y_val, valNumBatches)
        if not noInit:
            sess.run(tf.global_variables_initializer())
        X, Y = self.X, self.Y
        W, B, Z, _ = self.protoNNObj.getModelMatrices()
        for epoch in range(totalEpochs):
            for i in range(len(x_train_batches)):
                batch_x = x_train_batches[i]
                batch_y = y_train_batches[i]
                feed_dict = {
                    X: batch_x,
                    Y: batch_y
                }
                sess.run(self.trainStep, feed_dict=feed_dict)
                if i % printStep == 0:
                    loss, acc = sess.run([self.loss, self.accuracy],
                                         feed_dict=feed_dict)
                    msg = "Epoch: %3d Batch: %3d" % (epoch, i)
                    msg += " Loss: %3.5f Accuracy: %2.5f" % (loss, acc)
                    print(msg, file=redirFile)

            # Perform Hard thresholding
            if self.sparseTraining:
                W_, B_, Z_ = sess.run([W, B, Z])
                fd_thrsd = {
                    self.W_th: hardThreshold(W_, self.__sW),
                    self.B_th: hardThreshold(B_, self.__sB),
                    self.Z_th: hardThreshold(Z_, self.__sZ)
                }
                sess.run(self.__hthOp, feed_dict=fd_thrsd)

            if (epoch + 1) % valStep  == 0:
                acc = 0.0
                loss = 0.0
                for j in range(len(x_val_batches)):
                    batch_x = x_val_batches[j]
                    batch_y = y_val_batches[j]
                    feed_dict = {
                        X: batch_x,
                        Y: batch_y
                    }
                    acc_, loss_ = sess.run([self.accuracy, self.loss],
                                           feed_dict=feed_dict)
                    acc += acc_
                    loss += loss_
                acc /= len(y_val_batches)
                loss /= len(y_val_batches)
                print("Test Loss: %2.5f Accuracy: %2.5f" % (loss, acc))



class ProtoNN:
    def __init__(self, inputDimension, projectionDimension, numPrototypes,
                 numOutputLabels, gamma,
                 W = None, B = None, Z = None):
        '''
        Forward computation graph for ProtoNN.

        inputDimension: Input data dimension or feature dimension.
        projectionDimension: hyperparameter
        numPrototypes: hyperparameter
        numOutputLabels: The number of output labels or classes
        W, B, Z: Numpy matrices that can be used to initialize
            projection matrix(W), prototype matrix (B) and prototype labels
            matrix (B).
            Expected Dimensions:
                W   inputDimension (d) x projectionDimension (d_cap)
                B   projectionDimension (d_cap) x numPrototypes (m)
                Z   numOutputLabels (L) x numPrototypes (m)
        '''
        with tf.name_scope('protoNN') as ns:
            self.__nscope = ns
        self.__d = inputDimension
        self.__d_cap = projectionDimension
        self.__m = numPrototypes
        self.__L = numOutputLabels

        self.__inW = W
        self.__inB = B
        self.__inZ = Z
        self.__inGamma = gamma
        self.W, self.B, self.Z = None, None, None
        self.gamma = None

        self.__validInit = False
        self.__initWBZ()
        self.__initGamma()
        self.__validateInit()
        self.protoNNOut = None
        self.predictions = None
        self.accuracy = None

    def __validateInit(self):
        self.__validInit = False
        errmsg = "Dimensions mismatch! Should be W[d, d_cap]"
        errmsg += ", B[d_cap, m] and Z[L, m]"
        d, d_cap, m, L, _ = self.getHyperParams()
        assert self.W.shape[0] == d, errmsg
        assert self.W.shape[1] == d_cap, errmsg
        assert self.B.shape[0] == d_cap, errmsg
        assert self.B.shape[1] == m, errmsg
        assert self.Z.shape[0] == L, errmsg
        assert self.Z.shape[1] == m, errmsg
        self.__validInit = True

    def __initWBZ(self):
        with tf.name_scope(self.__nscope):
            W = self.__inW
            if W is None:
                W = tf.random_normal_initializer()
                W = W([self.__d, self.__d_cap])
            self.W = tf.Variable(W, name='W', dtype=tf.float32)

            B = self.__inB
            if B is None:
                B = tf.random_uniform_initializer()
                B = B([self.__d_cap, self.__m])
            self.B = tf.Variable(B, name='B', dtype=tf.float32)

            Z = self.__inZ
            if Z is None:
                Z = tf.random_normal_initializer()
                Z = Z([self.__L, self.__m])
            Z = tf.Variable(Z, name='Z', dtype=tf.float32)
            self.Z = Z
        return self.W, self.B, self.Z

    def __initGamma(self):
        with tf.name_scope(self.__nscope):
            gamma = self.__inGamma
            self.gamma = tf.constant(gamma, name='gamma')

    def getHyperParams(self):
        '''
        Returns the model hyperparameters:
            [inputDimension, projectionDimension,
            numPrototypes, numOutputLabels, gamma]
        '''
        d = self.__d
        dcap = self.__d_cap
        m = self.__m
        L = self.__L
        return d, dcap, m, L, self.gamma

    def getModelMatrices(self):
        '''
        Returns Tensorflow tensors of the model matrices, which
        can then be evaluated to obtain corresponding numpy arrays.

        These can then be exported as part of other implementations of
        ProtonNN, for instance a C++ implementation or pure python
        implementation.
        Returns
            [ProjectionMatrix (W), prototypeMatrix (B),
             prototypeLabelsMatrix (Z), gamma]
        '''
        return self.W, self.B, self.Z, self.gamma

    def __call__(self, X, Y=None):
        '''
        This method is responsible for construction of the forward computation
        graph. The end point of the computation graph, or in other words the
        output operator for the forward computation is returned. Additionally,
        if the argument Y is provided, a classification accuracy operator with
        Y as target will also be created. For this, Y is assumed to in one-hot
        encoded format and the class with the maximum prediction score is
        compared to the encoded class in Y.  This accuracy operator is returned
        by getAccuracyOp() method. If a different accuracyOp is required, it
        can be defined by overriding the createAccOp(protoNNScoresOut, Y)
        method.

        X: Input tensor or placeholder of shape [-1, inputDimension]
        Y: Optional tensor or placeholder for targets (labels or classes).
            Expected shape is [-1, numOutputLabels].
        returns: The forward computation outputs, self.protoNNOut
        '''
        # This should never execute
        assert self.__validInit is True, "Initialization failed!"
        if self.protoNNOut is not None:
            return self.protoNNOut

        W, B, Z, gamma = self.W, self.B, self.Z, self.gamma
        with tf.name_scope(self.__nscope):
            WX = tf.matmul(X, W)
            # Convert WX to tensor so that broadcasting can work
            dim = [-1, WX.shape.as_list()[1], 1]
            WX = tf.reshape(WX, dim)
            dim = [1, B.shape.as_list()[0], -1]
            B = tf.reshape(B, dim)
            l2sim = B - WX
            l2sim = tf.pow(l2sim, 2)
            l2sim = tf.reduce_sum(l2sim, 1, keepdims=True)
            self.l2sim = l2sim
            gammal2sim = (-1 * gamma * gamma) * l2sim
            M = tf.exp(gammal2sim)
            dim = [1] + Z.shape.as_list()
            Z = tf.reshape(Z, dim)
            y = tf.multiply(Z, M)
            y = tf.reduce_sum(y, 2, name='protoNNScoreOut')
            self.protoNNOut = y
            self.predictions = tf.argmax(y, 1, name='protoNNPredictions')
            if Y is not None:
                self.createAccOp(self.protoNNOut, Y)
        return y

    def createAccOp(self, outputs, target):
        '''
        Define an accuracy operation on ProtoNN's output scores and targets.
        Here a simple classification accuracy operator is defined. More
        complicated operators (for multiple label problems and so forth) can be
        defined by overriding this method
        '''
        assert self.predictions is not None
        target = tf.argmax(target, 1)
        correctPrediction = tf.equal(self.predictions, target)
        acc = tf.reduce_mean(tf.cast(correctPrediction, tf.float32),
                             name='protoNNAccuracy')
        self.accuracy = acc

    def getPredictionsOp(self):
        '''
        The predictions operator is defined as argmax(protoNNScores) for each
        prediction.
        '''
        return self.predictions

    def getAccuracyOp(self):
        '''
        returns accuracyOp as defined by createAccOp. It defaults to
        multi-class classification accuracy.
        '''
        msg = "Accuracy operator not defined in graph. Did you provide Y as an"
        msg += " argument to _call_?"
        assert self.accuracy is not None, msg
        return self.accuracy

In [14]:
DATA_DIR = r"./experiments"
windowLen = 'data_w4'
out = preprocessData(DATA_DIR,windowLen)
dataDimension = out[0]
numClasses = out[1]
x_train, y_train = out[2], out[3]
x_test, y_test = out[4], out[5]
print("Feature Dimension: ", dataDimension)
print("Num classes: ", numClasses)

Feature Dimension:  423
Num classes:  2


In [15]:

DATA_DIR = r"./experiments"
train, test = np.load(DATA_DIR + '/ttrain_data_w4.npy'), np.load(DATA_DIR + '/ttest_data_w4.npy')
x_train, y_train = train[:, 1:], train[:, 0]
x_test, y_test = test[:, 1:], test[:, 0]

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

numClasses = max(y_train) - min(y_train) + 1
numClasses = max(numClasses, max(y_test) - min(y_test) + 1)
numClasses = int(numClasses)

y_train = helper.to_onehot(y_train, numClasses)
y_test = helper.to_onehot(y_test, numClasses)
y_val = helper.to_onehot(y_val, numClasses)

dataDimension = x_train.shape[1]
numClasses = y_train.shape[1]

In [16]:
X = tf.placeholder(tf.float32, [None, dataDimension], name='X')
Y = tf.placeholder(tf.float32, [None, numClasses], name='Y')
protoNN = ProtoNN(dataDimension, PROJECTION_DIM,
                  NUM_PROTOTYPES, numClasses,
                  gamma, W=W, B=B)
trainer = ProtoNNTrainer(protoNN,  REG_W, REG_B, REG_Z,
                         SPAR_W, SPAR_B, SPAR_Z,
                        LEARNING_RATE, X, Y, lossType='l2')
sess = tf.Session()

trainer.train(2048, 800, sess, x_train, x_test, y_train, y_test,
              printStep=600, valStep=10)


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Epoch:   0 Batch:   0 Loss: 32765.28516 Accuracy: 0.49447
Epoch:   1 Batch:   0 Loss: 32096.01172 Accuracy: 0.49447
Epoch:   2 Batch:   0 Loss: 29436.13086 Accuracy: 0.49447
Epoch:   3 Batch:   0 Loss: 26945.33594 Accuracy: 0.49447
Epoch:   4 Batch:   0 Loss: 24635.99805 Accuracy: 0.49447
Epoch:   5 Batch:   0 Loss: 22509.29883 Accuracy: 0.49447
Epoch:   6 Batch:   0 Loss: 20560.07227 Accuracy: 0.49447
Epoch:   7 Batch:   0 Loss: 18779.18945 Accuracy: 0.49447
Epoch:   8 Batch:   0 Loss: 17155.16406 Accuracy: 0.49447
Epoch:   9 Batch:   0 Loss: 15675.44434 Accuracy: 0.49447
Test Loss: 13436.28101 Accuracy: 0.50000
Epoch:  10 Batch:   0 Loss: 14327.37305 Accuracy: 0.49447
Epoch:  11 Batch:   0 Loss: 13098.72559 Accuracy: 0.49447
Epoch:  12 Batch:   0 Loss: 11978.18359 Accuracy: 0.49447
Epoch:  13 Batch:   0 Loss: 10955.44922 Accuracy: 0.49

Test Loss: 439.69151 Accuracy: 0.65014
Epoch: 130 Batch:   0 Loss: 433.34543 Accuracy: 0.71248
Epoch: 131 Batch:   0 Loss: 432.78049 Accuracy: 0.71301
Epoch: 132 Batch:   0 Loss: 432.21405 Accuracy: 0.71143
Epoch: 133 Batch:   0 Loss: 431.64600 Accuracy: 0.71090
Epoch: 134 Batch:   0 Loss: 431.06546 Accuracy: 0.71143
Epoch: 135 Batch:   0 Loss: 430.50153 Accuracy: 0.71195
Epoch: 136 Batch:   0 Loss: 429.93594 Accuracy: 0.71143
Epoch: 137 Batch:   0 Loss: 429.35843 Accuracy: 0.71195
Epoch: 138 Batch:   0 Loss: 428.77936 Accuracy: 0.71353
Epoch: 139 Batch:   0 Loss: 428.20099 Accuracy: 0.71353
Test Loss: 436.00726 Accuracy: 0.65027
Epoch: 140 Batch:   0 Loss: 427.61618 Accuracy: 0.71301
Epoch: 141 Batch:   0 Loss: 427.02643 Accuracy: 0.71301
Epoch: 142 Batch:   0 Loss: 426.44025 Accuracy: 0.71301
Epoch: 143 Batch:   0 Loss: 425.85196 Accuracy: 0.71301
Epoch: 144 Batch:   0 Loss: 425.26089 Accuracy: 0.71248
Epoch: 145 Batch:   0 Loss: 424.66754 Accuracy: 0.71248
Epoch: 146 Batch:   0 Loss

Epoch: 267 Batch:   0 Loss: 316.48669 Accuracy: 0.80621
Epoch: 268 Batch:   0 Loss: 315.29065 Accuracy: 0.80779
Epoch: 269 Batch:   0 Loss: 314.08969 Accuracy: 0.80832
Test Loss: 357.09574 Accuracy: 0.69661
Epoch: 270 Batch:   0 Loss: 312.88812 Accuracy: 0.80937
Epoch: 271 Batch:   0 Loss: 311.68439 Accuracy: 0.81148
Epoch: 272 Batch:   0 Loss: 310.47885 Accuracy: 0.81253
Epoch: 273 Batch:   0 Loss: 309.27136 Accuracy: 0.81464
Epoch: 274 Batch:   0 Loss: 308.06223 Accuracy: 0.81464
Epoch: 275 Batch:   0 Loss: 306.84875 Accuracy: 0.81569
Epoch: 276 Batch:   0 Loss: 305.63635 Accuracy: 0.81833
Epoch: 277 Batch:   0 Loss: 304.42267 Accuracy: 0.81885
Epoch: 278 Batch:   0 Loss: 303.20776 Accuracy: 0.82043
Epoch: 279 Batch:   0 Loss: 301.99179 Accuracy: 0.82148
Test Loss: 348.70241 Accuracy: 0.71138
Epoch: 280 Batch:   0 Loss: 300.77136 Accuracy: 0.82201
Epoch: 281 Batch:   0 Loss: 299.55084 Accuracy: 0.82359
Epoch: 282 Batch:   0 Loss: 298.32980 Accuracy: 0.82675
Epoch: 283 Batch:   0 Loss

Epoch: 404 Batch:   0 Loss: 175.63692 Accuracy: 0.92786
Epoch: 405 Batch:   0 Loss: 174.97783 Accuracy: 0.92786
Epoch: 406 Batch:   0 Loss: 174.32475 Accuracy: 0.92786
Epoch: 407 Batch:   0 Loss: 173.67746 Accuracy: 0.92786
Epoch: 408 Batch:   0 Loss: 173.03603 Accuracy: 0.92786
Epoch: 409 Batch:   0 Loss: 172.40036 Accuracy: 0.92786
Test Loss: 288.02987 Accuracy: 0.78902
Epoch: 410 Batch:   0 Loss: 171.77049 Accuracy: 0.92891
Epoch: 411 Batch:   0 Loss: 171.14638 Accuracy: 0.92944
Epoch: 412 Batch:   0 Loss: 170.52792 Accuracy: 0.92944
Epoch: 413 Batch:   0 Loss: 169.91516 Accuracy: 0.92944
Epoch: 414 Batch:   0 Loss: 169.30811 Accuracy: 0.92944
Epoch: 415 Batch:   0 Loss: 168.70665 Accuracy: 0.92944
Epoch: 416 Batch:   0 Loss: 168.11072 Accuracy: 0.92944
Epoch: 417 Batch:   0 Loss: 167.52039 Accuracy: 0.92944
Epoch: 418 Batch:   0 Loss: 166.93558 Accuracy: 0.92944
Epoch: 419 Batch:   0 Loss: 166.35619 Accuracy: 0.92996
Test Loss: 287.57638 Accuracy: 0.78767
Epoch: 420 Batch:   0 Loss

Epoch: 542 Batch:   0 Loss: 125.26221 Accuracy: 0.94523
Epoch: 543 Batch:   0 Loss: 125.08860 Accuracy: 0.94576
Epoch: 544 Batch:   0 Loss: 124.91650 Accuracy: 0.94576
Epoch: 545 Batch:   0 Loss: 124.74584 Accuracy: 0.94629
Epoch: 546 Batch:   0 Loss: 124.57666 Accuracy: 0.94629
Epoch: 547 Batch:   0 Loss: 124.40889 Accuracy: 0.94629
Epoch: 548 Batch:   0 Loss: 124.24261 Accuracy: 0.94629
Epoch: 549 Batch:   0 Loss: 124.07771 Accuracy: 0.94629
Test Loss: 293.34890 Accuracy: 0.77737
Epoch: 550 Batch:   0 Loss: 123.91420 Accuracy: 0.94629
Epoch: 551 Batch:   0 Loss: 123.75206 Accuracy: 0.94629
Epoch: 552 Batch:   0 Loss: 123.59124 Accuracy: 0.94629
Epoch: 553 Batch:   0 Loss: 123.43182 Accuracy: 0.94576
Epoch: 554 Batch:   0 Loss: 123.27369 Accuracy: 0.94576
Epoch: 555 Batch:   0 Loss: 123.11685 Accuracy: 0.94576
Epoch: 556 Batch:   0 Loss: 122.96133 Accuracy: 0.94576
Epoch: 557 Batch:   0 Loss: 122.80708 Accuracy: 0.94576
Epoch: 558 Batch:   0 Loss: 122.65409 Accuracy: 0.94576
Epoch: 55

Test Loss: 284.63512 Accuracy: 0.77927
Epoch: 680 Batch:   0 Loss: 110.03160 Accuracy: 0.95050
Epoch: 681 Batch:   0 Loss: 109.95846 Accuracy: 0.95103
Epoch: 682 Batch:   0 Loss: 109.88566 Accuracy: 0.95103
Epoch: 683 Batch:   0 Loss: 109.81308 Accuracy: 0.95103
Epoch: 684 Batch:   0 Loss: 109.74083 Accuracy: 0.95103
Epoch: 685 Batch:   0 Loss: 109.66884 Accuracy: 0.95103
Epoch: 686 Batch:   0 Loss: 109.59713 Accuracy: 0.95103
Epoch: 687 Batch:   0 Loss: 109.52569 Accuracy: 0.95103
Epoch: 688 Batch:   0 Loss: 109.45448 Accuracy: 0.95103
Epoch: 689 Batch:   0 Loss: 109.38356 Accuracy: 0.95103
Test Loss: 283.43447 Accuracy: 0.77995
Epoch: 690 Batch:   0 Loss: 109.31296 Accuracy: 0.95103
Epoch: 691 Batch:   0 Loss: 109.24255 Accuracy: 0.95050
Epoch: 692 Batch:   0 Loss: 109.17238 Accuracy: 0.95050
Epoch: 693 Batch:   0 Loss: 109.10249 Accuracy: 0.95050
Epoch: 694 Batch:   0 Loss: 109.03287 Accuracy: 0.95103
Epoch: 695 Batch:   0 Loss: 108.96350 Accuracy: 0.95103
Epoch: 696 Batch:   0 Loss

In [17]:
acc = sess.run(protoNN.accuracy, feed_dict={X: x_test, Y: y_test})
pred = sess.run(protoNN.predictions, feed_dict={X: x_test, Y: y_test})
# W, B, Z are tensorflow graph nodes
W, B, Z, _ = protoNN.getModelMatrices()
matrixList = sess.run([W, B, Z])
sparcityList = [SPAR_W, SPAR_B, SPAR_Z]                       
nnz, size, sparse = getModelSize(matrixList, sparcityList)
print("Final test accuracy", acc)
print("Model size constraint (Bytes): ", size)
print("Number of non-zeros: ", nnz)

Final test accuracy 0.79363143
Model size constraint (Bytes):  9580
Number of non-zeros:  2395


In [18]:
from sklearn.metrics import confusion_matrix,classification_report
y_test = np.argmax(y_test,axis=1)
print (confusion_matrix(y_test,pred))
print (classification_report(y_test,pred,digits=5))

[[3016  674]
 [ 849 2841]]
              precision    recall  f1-score   support

           0    0.78034   0.81734   0.79841      3690
           1    0.80825   0.76992   0.78862      3690

    accuracy                        0.79363      7380
   macro avg    0.79429   0.79363   0.79352      7380
weighted avg    0.79429   0.79363   0.79352      7380



In [19]:
sensitivity = confusion_matrix(y_test,pred)[1][1]/(confusion_matrix(y_test,pred)[1][1] + confusion_matrix(y_test,pred)[1][0])
sensitivity

0.7699186991869919

In [20]:
specificity = confusion_matrix(y_test,pred)[0][0]/(confusion_matrix(y_test,pred)[0][0] + confusion_matrix(y_test,pred)[0][1])
specificity

0.8173441734417344