In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import keras
import umap

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedShuffleSplit
# Converting labels to 1-Hot Vectors
from sklearn.preprocessing import OneHotEncoder
from mpl_toolkits.mplot3d import Axes3D


import sys
# sys.path.append("/Users/Work/Developer/interpretDL/interprettensor")
root_logdir = "./tf_logs"

# To plot pretty figures
%matplotlib widget
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

np.random.seed(seed=42) 
tf.__version__

Using TensorFlow backend.


'1.13.1'

In [2]:
# Helper Functions

from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

######### Taken from sklearn #######
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots(figsize=[8,8])
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax


def get1hot(y_train,y_test):
    from sklearn.preprocessing import OneHotEncoder

    enc = OneHotEncoder(categories="auto", sparse=False)
    y_train_1hot = enc.fit_transform([[label] for label in y_train]) # Since the function expects an array of "features" per sample
    y_test_1hot = enc.fit_transform([[label] for label in y_test])

    return y_train_1hot, y_test_1hot

def get_split(features, labels):
    features = np.array(features)
    # The train set will have equal amounts of each target class
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in split.split(features, labels):
        X_train = features[train_index]
        y_train = labels.iloc[train_index]
        X_test = features[test_index]
        y_test = labels.iloc[test_index]
        
        yield X_train, y_train, X_test, y_test

def plot_history(history):
    plt.close()
    fig, axs = plt.subplots(1, 2, figsize=(12,6))

    # Plot training & validation accuracy values
    axs[0].grid(True)
    axs[0].plot(history.history['binary_accuracy'])
    axs[0].plot(history.history['val_binary_accuracy'])
    axs[0].set(title='Model accuracy', ylabel='Accuracy', xlabel='Epoch')
    axs[0].legend(['Train', 'Test'], loc='upper left')

    # Plot training & validation loss values
    axs[1].grid(True)
    axs[1].plot(history.history['loss'])
    axs[1].plot(history.history['val_loss'])
    axs[1].set(title='Model loss',ylabel='Loss', xlabel='Epoch')
    axs[1].legend(['Train', 'Test'], loc='upper left')

    plt.show()


def remove_label(features, labels, label="MCI"):
    labels = pd.Series(fused_labels)
    non_samples = labels != label

    stripped_features = features[non_samples]
    stripped_labels = labels[non_samples]

    return stripped_features, stripped_labels

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

class AttributeRemover(BaseEstimator, TransformerMixin):
    """
    Returns a copy of matrix with attributes removed
    """
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    
    def fit(self, X, y=None):
        return # Doesn't do anything
    
    def transform(self, X, y=None):
        return X.drop(columns=self.attribute_names)

class OverSampler(BaseEstimator, TransformerMixin):
    """
    Returns a copy of matrix with attributes removed
    """
    def __init__(self, random_state=42):
        self.smote = SMOTE(random_state=random_state)
    
    def fit(self, X, y=None):
        return None
    
    def transform(self, X, y=None):
        return self.smote.fit_resample(X,y)

class AddNoise(BaseEstimator, TransformerMixin):
    """
    Returns a copy of matrix with attributes removed
    """
    def __init__(self, random_state=42):
        self.smote = SMOTE(random_state=random_state)
    
    def fit(self, X, y=None):
        return None
    
    def transform(self, X, y=None):
        return self.smote.fit_resample(X,y)
    
# Not used
train_pipeline = Pipeline([
                    ("smote", OverSampler()),
                    ("normalizer", StandardScaler()) ])

In [4]:
from sklearn import datasets

iris = datasets.load_iris()

features = pd.DataFrame(iris["data"])
target = pd.Series(iris["target"])
flower_names = iris["target_names"]
feature_names = iris["feature_names"]
print(features.info())
flower_names

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
0    150 non-null float64
1    150 non-null float64
2    150 non-null float64
3    150 non-null float64
dtypes: float64(4)
memory usage: 4.8 KB
None


array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

### Get the first 2 flower samples

In [5]:
setosa = target == 0
versicolor = target == 1
samples = features[setosa | versicolor]
labels = target[setosa | versicolor]
class_size = sum(versicolor)

versicolor_samples = features[versicolor]
versicolor_labels = target[versicolor]
setosa_samples = features[setosa]

# versicolor_samples

### Splitting *versicolor* into two sub classes

In [6]:
versicolor_samples.max() #- versicolor_samples.min()

0    7.0
1    3.4
2    5.1
3    1.8
dtype: float64

In [7]:
## Constructing different noise sources
gauss_noise = np.random.normal(loc=1,scale=0.25, size=[class_size//2,2])
gauss_noise[gauss_noise < 0] = 0
unif_noise = np.random.uniform(low=0,high=1)
constant = 1


split_size = class_size//2

# Positive to first two features

B1 = versicolor_samples.iloc[:split_size,:2] + gauss_noise
B1 = np.concatenate((B1, versicolor_samples.iloc[:split_size,2:]), axis=1)
B1_labels = versicolor_labels.iloc[:split_size]

# Negative to last two features
# gauss_noise = np.random.normal(loc=0.1,scale=0.1, size=[class_size//2,2])
# gauss_noise[gauss_noise < 0] = 0
# unif_noise = np.random.uniform(low=0,high=1)

B2 = versicolor_samples.iloc[split_size:,2:] + gauss_noise
B2 = np.concatenate((versicolor_samples.iloc[split_size:,:2],B2), axis=1)
B2_labels = versicolor_labels.iloc[split_size:] + 1
    
# Combining the two fake "subclasses"
noisy_samples = np.concatenate((B1, B2), axis=0)

# print(versicolor_samples)
# # np.std(samples[50:])
gauss_noise

array([[1.12417854, 0.96543392],
       [1.16192213, 1.38075746],
       [0.94146166, 0.94146576],
       [1.3948032 , 1.19185868],
       [0.8826314 , 1.13564001],
       [0.88414558, 0.88356756],
       [1.06049057, 0.52167994],
       [0.56877054, 0.85942812],
       [0.74679222, 1.07856183],
       [0.77299398, 0.64692407],
       [1.36641219, 0.94355592],
       [1.01688205, 0.64381295],
       [0.86390432, 1.02773065],
       [0.71225161, 1.0939245 ],
       [0.84984033, 0.92707656],
       [0.84957335, 1.46306955],
       [0.99662569, 0.73557227],
       [1.20563623, 0.69478909],
       [1.0522159 , 0.51008247],
       [0.66795349, 1.04921531],
       [1.18461664, 1.04284207],
       [0.97108793, 0.92472408],
       [0.6303695 , 0.82003895],
       [0.88484031, 1.26428056],
       [1.08590457, 0.55923996]])

In [8]:
modded_samples = np.concatenate((setosa_samples,noisy_samples))
modded_labels = labels.copy()
modded_labels[class_size + split_size:] += 1

print(modded_samples.shape)
modded_samples[50:]
# modded_labels
# print(np.mean(modded_samples[50:], axis=0))
# np.std(modded_samples[50:],axis=0)

(100, 4)


array([[8.12417854, 4.16543392, 4.7       , 1.4       ],
       [7.56192213, 4.58075746, 4.5       , 1.5       ],
       [7.84146166, 4.04146576, 4.9       , 1.5       ],
       [6.8948032 , 3.49185868, 4.        , 1.3       ],
       [7.3826314 , 3.93564001, 4.6       , 1.5       ],
       [6.58414558, 3.68356756, 4.5       , 1.3       ],
       [7.36049057, 3.82167994, 4.7       , 1.6       ],
       [5.46877054, 3.25942812, 3.3       , 1.        ],
       [7.34679222, 3.97856183, 4.6       , 1.3       ],
       [5.97299398, 3.34692407, 3.9       , 1.4       ],
       [6.36641219, 2.94355592, 3.5       , 1.        ],
       [6.91688205, 3.64381295, 4.2       , 1.5       ],
       [6.86390432, 3.22773065, 4.        , 1.        ],
       [6.81225161, 3.9939245 , 4.7       , 1.4       ],
       [6.44984033, 3.82707656, 3.6       , 1.3       ],
       [7.54957335, 4.56306955, 4.4       , 1.4       ],
       [6.59662569, 3.73557227, 4.5       , 1.5       ],
       [7.00563623, 3.39478909,

In [9]:
samples[50:]

Unnamed: 0,0,1,2,3
50,7.0,3.2,4.7,1.4
51,6.4,3.2,4.5,1.5
52,6.9,3.1,4.9,1.5
53,5.5,2.3,4.0,1.3
54,6.5,2.8,4.6,1.5
55,5.7,2.8,4.5,1.3
56,6.3,3.3,4.7,1.6
57,4.9,2.4,3.3,1.0
58,6.6,2.9,4.6,1.3
59,5.2,2.7,3.9,1.4


### Train a DNN on the modified dataset

In [10]:
# Get split returns a generator
# List comprehension is one way to evaluate a generator
X_train, y_train, X_test, y_test = list(get_split(modded_samples, labels))[0]
print("Train Size:", X_train.shape)
print("Test Size:", y_test.shape)


hot_encoder = OneHotEncoder(categories="auto", sparse=False)
hot_encoder.fit(labels.values.reshape(-1,1)) # Since the function expects an array of "features" per sample
print("Categories:", hot_encoder.categories_)
X_test, y_test.values

Train Size: (80, 4)
Test Size: (20,)
Categories: [array([0, 1])]


(array([[5.46877054, 3.25942812, 3.3       , 1.        ],
        [6.7       , 3.        , 5.94146166, 2.64146576],
        [6.81225161, 3.9939245 , 4.7       , 1.4       ],
        [7.56192213, 4.58075746, 4.5       , 1.5       ],
        [5.        , 3.4       , 1.5       , 0.2       ],
        [5.8       , 4.        , 1.2       , 0.2       ],
        [5.4       , 3.4       , 1.7       , 0.2       ],
        [6.6       , 3.        , 5.52417854, 2.36543392],
        [5.7       , 3.8       , 1.7       , 0.3       ],
        [4.6       , 3.6       , 1.        , 0.2       ],
        [4.4       , 3.2       , 1.3       , 0.2       ],
        [7.34679222, 3.97856183, 4.6       , 1.3       ],
        [5.        , 3.3       , 1.4       , 0.2       ],
        [4.4       , 3.        , 1.3       , 0.2       ],
        [5.4       , 3.7       , 1.5       , 0.2       ],
        [5.        , 2.3       , 4.3522159 , 1.51008247],
        [6.26795349, 3.54921531, 3.9       , 1.1       ],
        [5.2  

In [11]:
NUM_FEATURES = X_train.shape[1]
NUM_LABELS = len(hot_encoder.categories_[0])

In [12]:
def build_dnn(num_features, num_labels=3):

#     reset_graph()
    
    keras.backend.clear_session()

    nn = keras.models.Sequential()
    Dense = keras.layers.Dense
    
    # Using He initialization
    he_init = tf.keras.initializers.he_uniform()
    
    nn.add(Dense(units = 12, activation="elu", input_dim=num_features,
                kernel_initializer=he_init))
    nn.add(Dense(units = 12, activation="elu",
                kernel_initializer=he_init))
    nn.add(Dense(units=1, activation= "sigmoid",
                kernel_initializer=he_init))

#     BCE = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    
    nn.compile(loss="binary_crossentropy",
                  optimizer='sgd',
                  metrics=['binary_accuracy'])
    
    return nn

def train_model(model, X, y, X_test=[], y_test=[], epochs=30, batch_size=20, verbose=1, plot=True):
    
    ZScaler = StandardScaler().fit(X)
    
    X_train = X #ZScaler.transform(X)
#     X_test = ZScaler.transform(X_test)
    
    y_train = y.values
    y_test = y_test.values
    
#     lr_scheduler = keras.callbacks.LearningRateScheduler(exp_decay)
    callback_list = []
    
    history = model.fit(X_train, y_train, epochs=epochs, batch_size = batch_size,
                        validation_data=(X_test, y_test), callbacks=callback_list, verbose=verbose)
    
#     if plot: plot_history(history)
    
    return history, ZScaler


In [13]:
nn = build_dnn(NUM_FEATURES)
history, Zscaler = train_model(nn, X_train, y_train, X_test, y_test, epochs=30, batch_size=10)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 80 samples, validate on 20 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [25]:
# Plotting results from history
plot_history(history)

FigureCanvasNbAgg()

In [15]:
# sess = tf.Session()
# predictions = nn.predict(Zscaler.transform(modded_samples))
# predictions = nn.predict(modded_samples)
# _labels = [np.float(x) for x in labels]

# sess.run(keras.metrics.binary_accuracy(_labels,predictions.flatten()))
# # predictions[0][0]
# # _labels
# # nn.evaluate(Zscaler.transform(modded_samples),labels.values)
# # nn.evaluate(modded_samples,labels.values)



# DONT FORGET TO RENORMALIZE WHEN EVALUATING SAMPLES

In [16]:
# from sklearn.model_selection import StratifiedKFold as KFold
# # keras.backend.clear_session()

# def getKF(X,y, n_splits=10):
#     kf = KFold(n_splits=n_splits, shuffle=True, random_state=42 ) #Default = 10

#     for train_index, test_index in kf.split(X,y):
#         X_train = X[train_index]
#         y_train = y.iloc[train_index]
#         X_test = X[test_index]
#         y_test = y.iloc[test_index]
        
#         yield X_train, y_train, X_test, y_test, test_index

# histories = []
# testing_indxs =[]
# predictions = []
# true_labels = []
# zoo = []
# for X_train, y_train, X_test, y_test, test_index in getKF(modded_samples, labels):
#     print(test_index)
#     dnn = build_dnn(NUM_FEATURES, NUM_LABELS)
#     history, ZScaler = train_model(dnn,X_train, y_train, X_test, y_test, verbose=0, plot=False, epochs=20, batch_size=10)
    
#     # Updating all information arrays
#     histories.append(history)
#     testing_indxs.append(test_index)
#     zoo.append(dnn)
    
#     y_pred_probs = dnn.predict(ZScaler.transform(X_test))
#     y_pred = np.argmax(y_pred_probs, axis=1)
#     y_true = np.argmax(hot_encoder.transform(y_test.values.reshape(-1,1)), axis=1)
    
#     predictions.extend(y_pred)
#     true_labels.extend(y_true)
    
#     print("Scores on test set: loss={:0.3f} accuracy={:.4f}".format(history.history["binary_accuracy"][-1], history.history["val_binary_accuracy"][-1]))

In [17]:
# # Num is the figure number and clear tells it to clear the figure if it already exists
# plt.close()
# fig, axs = plt.subplots(num="KF Eval",
#                         nrows=len(histories)//2, ncols=2,
#                         figsize=(10,10), sharex=True, sharey=True)
# axs=axs.flatten()
# dfs = []

# for i,history in enumerate(histories):
#     df = pd.DataFrame(history.history)
#     dfs.append(df)
# #     axs[i].grid(True)
#     df[["binary_accuracy","val_binary_accuracy"]].plot(ax=axs[i], grid=True)

In [18]:
# sess = tf.Session()
# idx = -1
# best_dnn = zoo[idx]

# predictions = best_dnn.predict(Zscaler.transform(modded_samples))
# _labels = [np.float(x) for x in labels]
# sess.run(keras.metrics.binary_accuracy(_labels,predictions.flatten()))

# best_dnn.evaluate(Zscaler.transform(modded_samples),labels.values)
# # NOT bestdnn.evaluate(modded_samples,labels.values)


In [19]:
# modded_samples
# scaled_samples

# NOW PERFORM LRP

In [20]:
model = nn
scaled_samples = modded_samples #Zscaler.transform(modded_samples)


predictions = model.predict(scaled_samples)
preds = np.array([np.round(x[0]) for x in predictions])
true_labels = [np.float(x) for x in labels]

correct = preds == true_labels
versicolor = true_labels == 1

print("SANITY CHECK")
loss_and_metrics = model.evaluate(scaled_samples[correct], labels[correct])
print("Scores on test set: loss={:0.3f} accuracy={:.4f}".format(*loss_and_metrics))

SANITY CHECK
Scores on test set: loss=0.043 accuracy=1.0000


In [21]:
import innvestigate
import innvestigate.utils as iutils

def perform_analysis(model, analyzer, data, labels):
    analysis = analyzer.analyze(data)
    prediction = model.predict(data)
    
    df_anal = pd.DataFrame(analysis)
    
    return df_anal


# Stripping the softmax activation from the model
# model_wo_sm = iutils.keras.graph.model_wo_softmax(model)

# Creating an analyzer
lrp_E = innvestigate.analyzer.relevance_based.relevance_analyzer.LRPEpsilon(model=model)

lrp = innvestigate.analyzer.relevance_based.relevance_analyzer.LRPAlpha2Beta1(model=model)

# Getting all the samples that can be correctly predicted
test_idx = correct
all_samples = scaled_samples[test_idx]
all_labels = labels[test_idx]
mod_labels = modded_labels[test_idx]

# perform_analysis(nn,gradient_analyzer,flowers,types)
all_lrp = perform_analysis(model,lrp, all_samples, all_labels)

all_lrp_E = perform_analysis(model,lrp_E, all_samples, all_labels)

Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [22]:
all_lrp.describe()
# mod_labels[51:50]

Unnamed: 0,0,1,2,3
count,100.0,100.0,100.0,100.0
mean,0.291284,-0.914311,0.998043,-0.042858
std,0.27249,0.896948,0.943505,0.029931
min,0.010299,-2.063652,0.00928,-0.110671
25%,0.048845,-1.794146,0.046353,-0.071593
50%,0.294323,-0.895944,0.952412,-0.026975
75%,0.412677,-0.013197,1.949077,-0.016178
max,0.838661,0.001285,2.074918,-0.003733


In [28]:
# plt.close()
lrp_results = all_lrp
# # lrp_E_results = all_lrp_E
population = lrp_results.mean()
sorted_features = population.sort_values(ascending=False)

# sorted_features.plot(kind="bar")
# plt.xticks(rotation=65, fontsize="small")

# plt.show()

In [29]:
[feature_names[x] for x in sorted_features.index]

['petal length (cm)',
 'sepal length (cm)',
 'petal width (cm)',
 'sepal width (cm)']

## Plotting UMAP projections

In [30]:
def plot_2d(X,labels, name="1"):
    plt.close()
    fig = plt.figure(figsize=(10,8))
    plt.scatter(x=X[:,0], y=X[:,1], s= 30, c=labels)
    plt.colorbar()

    
pos_only = all_lrp.copy()
pos_only[pos_only < 0] = 0

# _labels = mod_labels


# pos_only = lrp_results.copy()
plt.close()
# pos_only[pos_only < 0] = 0
reducer_3d = umap.UMAP(random_state=42,
                    n_components = 3,
                    n_neighbors=50,
                    min_dist=0)

reducer_2d = umap.UMAP(random_state=42,
                    n_components = 2,
                    n_neighbors=25,
                    min_dist=0)

embedding_3d = reducer_3d.fit_transform(pos_only)
embedding_2d = reducer_2d.fit_transform(pos_only)

plot_2d(embedding_2d, mod_labels, name="Alpha")

fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(111, projection='3d')
_im = ax.scatter(embedding_3d[:,0], embedding_3d[:,1], embedding_3d[:,2], c=mod_labels, s=40)
plt.colorbar(_im)

plt.show()


  n_components
  n_components


FigureCanvasNbAgg()

FigureCanvasNbAgg()

## LRP-Epsilon

In [31]:
pos_only = all_lrp_E.copy()
pos_only[pos_only < 0] = 0

# _labels = mod_labels


# pos_only = lrp_results.copy()
plt.close()
# pos_only[pos_only < 0] = 0
reducer_3d = umap.UMAP(random_state=42,
                    n_components = 3,
                    n_neighbors=50,
                    min_dist=0)

reducer_2d = umap.UMAP(random_state=42,
                    n_components = 2,
                    n_neighbors=25,
                    min_dist=0)

embedding_3d = reducer_3d.fit_transform(pos_only)
embedding_2d = reducer_2d.fit_transform(pos_only)

plot_2d(embedding_2d, mod_labels, name="Alpha")

fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(111, projection='3d')
_im = ax.scatter(embedding_3d[:,0], embedding_3d[:,1], embedding_3d[:,2], c=mod_labels, s=40)
plt.colorbar(_im)

plt.show()


  n_components
  n_components


FigureCanvasNbAgg()

FigureCanvasNbAgg()