In [82]:
import tensorflow as tf
import numpy as np
import pandas as pd
import keras

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedShuffleSplit
# Converting labels to 1-Hot Vectors
from sklearn.preprocessing import OneHotEncoder



import sys
# sys.path.append("/Users/Work/Developer/interpretDL/interprettensor")
root_logdir = "./tf_logs"

# To plot pretty figures
%matplotlib widget
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

np.random.seed(seed=42) 
tf.__version__

'1.13.1'

In [139]:
# Helper Functions

from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

######### Taken from sklearn #######
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots(figsize=[8,8])
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax


def get1hot(y_train,y_test):
    from sklearn.preprocessing import OneHotEncoder

    enc = OneHotEncoder(categories="auto", sparse=False)
    y_train_1hot = enc.fit_transform([[label] for label in y_train]) # Since the function expects an array of "features" per sample
    y_test_1hot = enc.fit_transform([[label] for label in y_test])

    return y_train_1hot, y_test_1hot

def get_split(features, labels):
    features = np.array(features)
    # The train set will have equal amounts of each target class
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in split.split(features, labels):
        X_train = features[train_index]
        y_train = labels.iloc[train_index]
        X_test = features[test_index]
        y_test = labels.iloc[test_index]
        
        yield X_train, y_train, X_test, y_test

def plot_history(history):
    plt.close()
    fig, axs = plt.subplots(1, 2, figsize=(12,6))

    # Plot training & validation accuracy values
    axs[0].grid(True)
    axs[0].plot(history.history['binary_accuracy'])
    axs[0].plot(history.history['val_binary_accuracy'])
    axs[0].set(title='Model accuracy', ylabel='Accuracy', xlabel='Epoch')
    axs[0].legend(['Train', 'Test'], loc='upper left')

    # Plot training & validation loss values
    axs[1].grid(True)
    axs[1].plot(history.history['loss'])
    axs[1].plot(history.history['val_loss'])
    axs[1].set(title='Model loss',ylabel='Loss', xlabel='Epoch')
    axs[1].legend(['Train', 'Test'], loc='upper left')

    plt.show()


def remove_label(features, labels, label="MCI"):
    labels = pd.Series(fused_labels)
    non_samples = labels != label

    stripped_features = features[non_samples]
    stripped_labels = labels[non_samples]

    return stripped_features, stripped_labels

In [7]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

class AttributeRemover(BaseEstimator, TransformerMixin):
    """
    Returns a copy of matrix with attributes removed
    """
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    
    def fit(self, X, y=None):
        return # Doesn't do anything
    
    def transform(self, X, y=None):
        return X.drop(columns=self.attribute_names)

class OverSampler(BaseEstimator, TransformerMixin):
    """
    Returns a copy of matrix with attributes removed
    """
    def __init__(self, random_state=42):
        self.smote = SMOTE(random_state=random_state)
    
    def fit(self, X, y=None):
        return None
    
    def transform(self, X, y=None):
        return self.smote.fit_resample(X,y)

class AddNoise(BaseEstimator, TransformerMixin):
    """
    Returns a copy of matrix with attributes removed
    """
    def __init__(self, random_state=42):
        self.smote = SMOTE(random_state=random_state)
    
    def fit(self, X, y=None):
        return None
    
    def transform(self, X, y=None):
        return self.smote.fit_resample(X,y)
    
# Not used
train_pipeline = Pipeline([
                    ("smote", OverSampler()),
                    ("normalizer", StandardScaler()) ])

In [19]:
from sklearn import datasets

iris = datasets.load_iris()

features = pd.DataFrame(iris["data"])
target = pd.Series(iris["target"])
flower_names = iris["target_names"]
feature_names = iris["feature_names"]
flower_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

### Get the first 2 flower samples

In [65]:
setosa = target == 0
versicolor = target == 1
samples = features[setosa | versicolor]
labels = target[setosa | versicolor]
class_size = sum(setosa)

### Splitting *versicolor* into two sub classes

In [77]:
# Adding Gaussian Noise

# Positive to first two features
gauss_noise = np.random.normal(loc=0.1,scale=0.1, size=[class_size,2])
gauss_noise[gauss_noise < 0] = 0
B1 = samples.iloc[50:,:2] + gauss_noise

# Negative to last two features
gauss_noise = np.random.normal(loc=0.1,scale=0.1, size=[class_size,2])
gauss_noise[gauss_noise < 0] = 0
B2 = samples.iloc[50:,2:] - gauss_noise

# Combining the two fake "subclasses"
noisy_samples = np.concatenate((B1, B2), axis=1)

# print(np.mean(samples[50:]))
# np.std(samples[50:])

0    5.936
1    2.770
2    4.260
3    1.326
dtype: float64


0    0.510983
1    0.310644
2    0.465188
3    0.195765
dtype: float64

In [81]:
modded_samples = np.concatenate((samples.iloc[:50,:],noisy_samples))

# modded_samples
# print(np.mean(modded_samples[50:], axis=0))
# np.std(modded_samples[50:],axis=0)

### Train a DNN on the modified dataset

In [202]:
# Get split returns a generator
# List comprehension is one way to evaluate a generator
X_train, y_train, X_test, y_test = list(get_split(modded_samples, labels))[0]
print("Train Size:", X_train.shape)
print("Test Size:", y_test.shape)


hot_encoder = OneHotEncoder(categories="auto", sparse=False)
hot_encoder.fit(labels.values.reshape(-1,1)) # Since the function expects an array of "features" per sample
print("Categories:", hot_encoder.categories_)
X_test, y_test.values

Train Size: (80, 4)
Test Size: (20,)
Categories: [array([0, 1])]


(array([[4.99047045, 2.52790215, 3.02640362, 0.88020892],
        [6.79178488, 3.21172958, 4.95503052, 1.66716234],
        [6.14410782, 3.03772119, 4.59619965, 1.28799687],
        [6.50287448, 3.42784519, 4.39403696, 1.46469368],
        [5.        , 3.4       , 1.5       , 0.2       ],
        [5.8       , 4.        , 1.2       , 0.2       ],
        [5.4       , 3.4       , 1.7       , 0.2       ],
        [6.67587639, 3.13520554, 4.36255627, 1.26041965],
        [5.7       , 3.8       , 1.7       , 0.3       ],
        [4.6       , 3.6       , 1.        , 0.2       ],
        [4.4       , 3.2       , 1.3       , 0.2       ],
        [6.76078965, 3.01866091, 4.5651418 , 1.24838858],
        [5.        , 3.3       , 1.4       , 0.2       ],
        [4.4       , 3.        , 1.3       , 0.2       ],
        [5.4       , 3.7       , 1.5       , 0.2       ],
        [5.25033983, 2.48773623, 3.09697165, 0.87612108],
        [5.72438007, 2.54359214, 3.77755482, 0.89529017],
        [5.2  

In [123]:
NUM_FEATURES = X_train.shape[1]
NUM_LABELS = len(hot_encoder.categories_[0])

In [236]:
def build_dnn(num_features, num_labels=3):

#     reset_graph()
    
    keras.backend.clear_session()

    nn = keras.models.Sequential()
    Dense = keras.layers.Dense
    
    # Using He initialization
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    nn.add(Dense(units = 12, activation="elu", input_dim=num_features,
                kernel_initializer=he_init))
    nn.add(Dense(units = 12, activation="elu",
                kernel_initializer=he_init))
    nn.add(Dense(units=1, activation= "sigmoid",
                kernel_initializer=he_init))

#     BCE = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    
    nn.compile(loss="binary_crossentropy",
                  optimizer='sgd',
                  metrics=['binary_accuracy'])
    
    return nn

def train_model(model, X, y, X_test=[], y_test=[], epochs=30, batch_size=20, verbose=1, plot=True):
    
    ZScaler = StandardScaler().fit(X)
    
    X_train = ZScaler.transform(X)
    X_test = ZScaler.transform(X_test)
    
    y_train = y.values
    y_test = y_test.values
    
#     lr_scheduler = keras.callbacks.LearningRateScheduler(exp_decay)
    callback_list = []
    
    history = model.fit(X_train, y_train, epochs=epochs, batch_size = batch_size,
                        validation_data=(X_test, y_test), callbacks=callback_list, verbose=verbose)
    
#     if plot: plot_history(history)
    
    return history, ZScaler


In [237]:
nn = build_dnn(NUM_FEATURES)
history, Zscaler = train_model(nn, X_train, y_train, X_test, y_test, epochs=20, batch_size=10)

Train on 80 samples, validate on 20 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [238]:
# Plotting results from history
plot_history(history)


FigureCanvasNbAgg()

In [240]:
sess = tf.Session()
predictions = nn.predict(Zscaler.transform(modded_samples))
_labels = [np.float(x) for x in labels]

sess.run(keras.metrics.binary_accuracy(_labels,predictions.flatten()))
# predictions[0][0]
# _labels
# nn.evaluate(Zscaler.transform(modded_samples),labels.values)
# nn.evaluate(modded_samples,labels.values)



1.0

# DONT FORGET TO RENORMALIZE WHEN EVALUATING SAMPLES

In [128]:
from sklearn.model_selection import StratifiedKFold as KFold
# keras.backend.clear_session()

def getKF(X,y, n_splits=10):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42 ) #Default = 10

    for train_index, test_index in kf.split(X,y):
        X_train = X[train_index]
        y_train = y.iloc[train_index]
        X_test = X[test_index]
        y_test = y.iloc[test_index]
        
        yield X_train, y_train, X_test, y_test, test_index

histories = []
testing_indxs =[]
predictions = []
true_labels = []
zoo = []
for X_train, y_train, X_test, y_test, test_index in getKF(modded_samples, labels):
    print(test_index)
    dnn = build_dnn(NUM_FEATURES, NUM_LABELS)
    history, ZScaler = train_model(dnn,X_train, y_train, X_test, y_test, verbose=0, plot=False, epochs=20, batch_size=10)
    
    # Updating all information arrays
    histories.append(history)
    testing_indxs.append(test_index)
    zoo.append(dnn)
    
    y_pred_probs = dnn.predict(ZScaler.transform(X_test))
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(hot_encoder.transform(y_test.values.reshape(-1,1)), axis=1)
    
    predictions.extend(y_pred)
    true_labels.extend(y_true)
    
    print("Scores on test set: loss={:0.3f} accuracy={:.4f}".format(history.history["binary_accuracy"][-1], history.history["val_binary_accuracy"][-1]))

[13 17 30 39 45 63 67 80 89 95]
Scores on test set: loss=0.989 accuracy=1.0000
[19 25 26 32 48 69 75 76 82 98]
Scores on test set: loss=1.000 accuracy=1.0000
[ 3  4  8 12 37 53 54 58 62 87]
Scores on test set: loss=1.000 accuracy=1.0000
[ 6 15 41 46 47 56 65 91 96 97]
Scores on test set: loss=1.000 accuracy=0.9000
[ 9 16 24 31 34 59 66 74 81 84]
Scores on test set: loss=1.000 accuracy=1.0000
[ 0  5 27 33 44 50 55 77 83 94]
Scores on test set: loss=1.000 accuracy=1.0000
[ 1 11 21 29 36 51 61 71 79 86]
Scores on test set: loss=1.000 accuracy=1.0000
[ 2 23 35 40 43 52 73 85 90 93]
Scores on test set: loss=1.000 accuracy=1.0000
[10 18 20 22 49 60 68 70 72 99]
Scores on test set: loss=1.000 accuracy=1.0000
[ 7 14 28 38 42 57 64 78 88 92]
Scores on test set: loss=1.000 accuracy=1.0000


In [131]:
# Num is the figure number and clear tells it to clear the figure if it already exists
plt.close()
fig, axs = plt.subplots(num="KF Eval",
                        nrows=len(histories)//2, ncols=2,
                        figsize=(10,10), sharex=True, sharey=True)
axs=axs.flatten()
dfs = []

for i,history in enumerate(histories):
    df = pd.DataFrame(history.history)
    dfs.append(df)
#     axs[i].grid(True)
    df[["binary_accuracy","val_binary_accuracy"]].plot(ax=axs[i], grid=True)

FigureCanvasNbAgg()

In [242]:
sess = tf.Session()
idx = -1
best_dnn = zoo[idx]

predictions = best_dnn.predict(Zscaler.transform(modded_samples))
_labels = [np.float(x) for x in labels]
sess.run(keras.metrics.binary_accuracy(_labels,predictions.flatten()))

best_dnn.evaluate(Zscaler.transform(modded_samples),labels.values)
# NOT bestdnn.evaluate(modded_samples,labels.values)




[0.031785084158182146, 1.0]

# NOW PERFORM LRP