In [1]:
import numpy as np
from bqplot import *
from bqplot.marks import Graph
from ipywidgets import IntSlider, Dropdown, RadioButtons, HBox, VBox, Button, Layout
from bqplot import pyplot as plt
from bqplot import OrdinalScale

from IPython.display import display

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

np.random.seed(7)
data_df_total = pd.read_csv('./data_files/credit-training.csv', index_col=0)
result_column = 'SeriousDlqin2yrs'

train_idx, test_idx = train_test_split(data_df_total.index.values, test_size=0.3,
                                       stratify=data_df_total[result_column])
train_data = data_df_total.loc[train_idx]
test_data = data_df_total.loc[test_idx]

In [3]:
# Utility functions for cleaning the data and adding features.

overdue_cols = ['NumberOfTime30-59DaysPastDueNotWorse', 'NumberOfTime60-89DaysPastDueNotWorse', 'NumberOfTimes90DaysLate']
rev_lines_col = 'RevolvingUtilizationOfUnsecuredLines'

def clean_train_data(train_df):
    train_df = train_df.copy()
    
    data_median = train_df.median()
    fill_values = {}
    fill_values['MonthlyIncome'] = data_median['MonthlyIncome']
    
    for col in overdue_cols:
        num_overdue_df = train_df.loc[train_df[col] >= 90]            
        fill_values[col] = train_df[col].median()
        train_df.loc[num_overdue_df.index, col] = train_df[col].median()
 
    ## filling the value for revolving unsecured lines.
    rev_filtered_df = train_df[train_df[rev_lines_col] >= 4.0]
    train_df.loc[rev_filtered_df.index, rev_lines_col] = train_df[rev_lines_col].median()
    fill_values[rev_lines_col] = train_df[rev_lines_col].median()
    return train_df, fill_values 


def clean_test_data(test_df, fill_values, fill_values_other):
    test_df = test_df.copy()

    for c in overdue_cols:
        fill_idxs = test_df.index[test_df[c] >= 90]
        test_df.loc[fill_idxs, c] = fill_values[c]

    fill_rev_idxs = test_df.index[test_df[rev_lines_col] >= 4.0]
    test_df.loc[fill_rev_idxs, rev_lines_col] = fill_values[rev_lines_col]
    test_df = test_df.fillna(fill_values_other)
    return test_df

def add_features(data_frame):
    return_dataframe = data_frame.copy()
    return_dataframe[rev_lines_col+'ind'] = return_dataframe[rev_lines_col] == 0.
    return_dataframe['overdue_ind'] = (return_dataframe[overdue_cols].sum(axis=1) == 0)
    return return_dataframe

In [4]:
## Data cleaning and adding additional features.
data_median = train_data.median()
train_data_clean, fill_dict = clean_train_data(train_data)
data_median = train_data_clean.median()

## fill in the remaining values with the median
train_data_clean = train_data_clean.fillna(data_median)
train_data_clean = add_features(train_data_clean)

test_data_cleaned = clean_test_data(test_data, fill_dict, data_median)
test_data_cleaned = add_features(test_data_cleaned)

X_train = train_data_clean.drop(result_column, axis=1)
y_train = train_data_clean[result_column]

X_test = test_data_cleaned.drop(result_column, axis=1)
y_test = test_data_cleaned[result_column]

In [5]:
X_train.head()

Unnamed: 0,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,RevolvingUtilizationOfUnsecuredLinesind,overdue_ind
120934,0.146785,57,0.0,0.710736,2253.0,5,0.0,1,0.0,2.0,False,True
118416,0.016886,43,0.0,18.0,5400.0,4,0.0,0,0.0,1.0,False,True
33276,0.0,64,0.0,0.033605,10176.0,6,0.0,0,0.0,0.0,True,True
67994,2.143095,51,1.0,0.287061,3500.0,3,3.0,0,0.0,0.0,False,False
65964,1.0,66,0.0,42.0,5400.0,0,0.0,0,0.0,0.0,False,True


In [6]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score
## Utility functions for model evaluation.
def eval_preds(y_true, y_probs, y_preds):
    return {'precision': precision_score(y_true, y_preds),
            'accuracy': accuracy_score(y_true, y_preds),
            'recall': recall_score(y_true, y_preds),
            'auc': roc_auc_score(y_true, y_probs)}

def get_model_eval(true_train, train_predictions, true_test=None, test_predictions=None):
    train_eval = eval_preds(true_train, *train_predictions)
    if true_test is None:
        return pd.Series(train_eval)
    else:
        test_eval = eval_preds(true_test, *test_predictions)
        return pd.DataFrame([train_eval, test_eval], index=['Train', 'Test'])

def probas_to_classes(probas):
    return (probas >= 0.5).astype(float)

In [7]:
## Training the model.
num_epochs = 15
batch_size = 5000

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras import regularizers

import keras
import pandas as pd
import keras.backend as K

import tensorflow as tf
sess = tf.InteractiveSession()
K.set_session(sess)

sample_weights = np.ones(X_train.shape[0])

## callback to compute the gradients
class WeightsGradientsCallback(keras.callbacks.Callback):
    def __init__(self):
        self.gradients = []
        self.train_auc = []
        self.test_auc = []
        self.weights = []
    
    def on_epoch_end(self, epoch, logs={}):
        input_values = [X_train_norm, sample_weights, y_train.values.reshape(-1, 1), 0]
        gradient_values = compute_gradients(input_values)
        self.gradients.append(gradient_values)
        
        self.train_auc.append(roc_auc_score(y_train.values.flatten(), 
                                            self.model.predict(X_train_norm)))
        self.test_auc.append(roc_auc_score(y_test.values.flatten(), 
                                           self.model.predict(X_test_norm)))
        
        auc_line.x = np.arange(0, epoch + 1)
        auc_line.y = [self.train_auc, self.test_auc]
        
        weights = list(range(len(self.model.layers)))
        for i, l in enumerate(self.model.layers):
            weights[i] = l.get_weights()
        self.weights.append(weights)
        
class LayerEvalsCallsback(keras.callbacks.Callback):
    def __init__(self):
        self.train_activs = []
        self.test_activs = []
        
    def on_epoch_end(self, epoch, logs={}):
        train_activs = layer_evaluator([X_train_norm, 1.])
        test_activs = layer_evaluator([X_test_norm, 1.])
        
        self.train_activs.append(train_activs)
        self.test_activs.append(test_activs)

weights_callback = WeightsGradientsCallback()
activs_callback = LayerEvalsCallsback()

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train_norm = scaler.fit_transform(X_train.values.astype('float'))
X_test_norm = scaler.transform(X_test.values.astype('float'))
dropout_prob = 0.2

model = Sequential()
model.add(Dense(20, input_dim=X_train_norm.shape[1], activation='relu'))
model.add(Dropout(dropout_prob))

model.add(Dense(10, activation='tanh'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', 
              metrics=['accuracy'])

## variables for WeightsGradientsCallback:
gradients = model.optimizer.get_gradients(model.model.total_loss, 
                                          model.trainable_weights)
input_tensors = [model.model.inputs[0], model.model.sample_weights[0], 
                 model.model.targets[0], K.learning_phase()]
compute_gradients = K.function(inputs=input_tensors, outputs=gradients)   

## display figure for WeightsGradientsCallback
auc_fig = plt.figure(title='Train and Test AUC vs epoch', legend_location='top-left')
auc_line = plt.plot([0], [0], marker='circle', marker_size=32, colors=['DeepSkyBlue', 'Red'], 
                              labels=['Training', 'Test'], display_legend=True)
display(auc_fig)

## variables for LayerEvalsCallback
inp = model.input                                          
outputs = [layer.output for layer in model.layers]         
layer_evaluator = K.function([inp]+ [K.learning_phase()], outputs) 

# Fit the model
model.fit(X_train_norm, y_train.values, verbose=2,
          epochs=num_epochs, batch_size=batch_size,
           callbacks=[weights_callback, activs_callback])

train_probs = model.predict(X_train_norm).flatten()
train_preds = probas_to_classes(train_probs)

test_probs = model.predict(X_test_norm).flatten()
test_preds = probas_to_classes(test_probs)


model_eval = get_model_eval(y_train, [train_probs, train_preds],
                            y_test, [test_probs, test_preds])
print(model_eval)
sess.close()    

Using TensorFlow backend.


A Jupyter Widget

Epoch 1/15
5s - loss: 0.5777 - acc: 0.7807
Epoch 2/15
3s - loss: 0.4434 - acc: 0.9150
Epoch 3/15
3s - loss: 0.3591 - acc: 0.9309
Epoch 4/15
3s - loss: 0.3059 - acc: 0.9330
Epoch 5/15
4s - loss: 0.2703 - acc: 0.9338
Epoch 6/15
3s - loss: 0.2457 - acc: 0.9340
Epoch 7/15
5s - loss: 0.2301 - acc: 0.9335
Epoch 8/15
4s - loss: 0.2196 - acc: 0.9336
Epoch 9/15
3s - loss: 0.2121 - acc: 0.9340
Epoch 10/15
7s - loss: 0.2074 - acc: 0.9344
Epoch 11/15
3s - loss: 0.2036 - acc: 0.9342
Epoch 12/15
3s - loss: 0.2008 - acc: 0.9344
Epoch 13/15
3s - loss: 0.1984 - acc: 0.9348
Epoch 14/15
3s - loss: 0.1974 - acc: 0.9346
Epoch 15/15
3s - loss: 0.1955 - acc: 0.9350
       accuracy       auc  precision    recall
Train  0.935971  0.832089   0.579258  0.153605
Test   0.937067  0.831639   0.602088  0.172540


In [8]:
def get_activations_hist(epoch, layer, node, data='Train'):
    if data == 'Train':
        activs_array = activs_callback.train_activs
    else:
        activs_array = activs_callback.test_activs
    
    if layer >= 1:
        layer = layer + 1
    layer_activs = activs_array[epoch - 1][layer].T[node]
    return layer_activs

In [9]:
def get_cleaned_weights(weights_mat):
    weights_ret = []
    for w in weights_mat:
        if np.shape(w)[0] == 0:
            # this is a dropout layer or a reg layer which does no have weights
            pass
        else:
            weights_ret.append(w)
    return weights_ret

def get_weights_for_node_at_layer(weights, epoch_num, layer_num, node_num):
    # max_layers = len(weights)
    layer_params = weights[epoch_num][layer_num]
    
    layer_weights = layer_params[0]
    layer_bias = layer_params[1]
    
    node_weights = layer_weights[:, node_num]
    node_bias = layer_bias[node_num]
    
    return (node_bias, node_weights)

def get_gradients_for_node_at_layer(gradients, epoch_num, layer_num, node_num):
    layer_gradients = gradients[epoch_num][2 * layer_num]
    layer_bias_gradients = gradients[epoch_num][2 * layer_num + 1]
    
    node_gradients = layer_gradients[:, node_num]
    node_bias_gradiens = layer_bias_gradients[node_num]
    
    return(node_bias_gradiens, node_gradients)

cleaned_weights = []

for w in weights_callback.weights:
    cleaned_weights.append(get_cleaned_weights(w))

In [10]:
from itertools import chain, product
class NeuralNet(Figure):
    def __init__(self, **kwargs):
        self.height = kwargs.get('height', 800)
        self.width = kwargs.get('width', 900)
        self.directed_links = kwargs.get('directed_links', False)
        
        self.num_inputs = kwargs['num_inputs']
        self.num_hidden_layers = kwargs['num_hidden_layers']
        self.nodes_output_layer = kwargs['num_outputs']
        self.layer_colors = kwargs.get('layer_colors', 
                                       ['Orange'] * (len(self.num_hidden_layers) + 2))
        
        self.build_net()
        super(NeuralNet, self).__init__(**kwargs)
    
    def build_net(self):
        # create nodes
        self.layer_nodes = []
        self.layer_nodes.append(['x' + str(i+1) for i in range(self.num_inputs)])
        
        for i, h in enumerate(self.num_hidden_layers):
            self.layer_nodes.append(['h' + str(i+1) + ',' + str(j+1) for j in range(h)])
        self.layer_nodes.append(['y' + str(i+1) for i in range(self.nodes_output_layer)])
        
        self.flattened_layer_nodes = list(chain(*self.layer_nodes))
        
        # build link matrix
        i = 0
        node_indices = {}
        for layer in self.layer_nodes:
            for node in layer:
                node_indices[node] = i
                i += 1

        n = len(self.flattened_layer_nodes)
        self.link_matrix = np.empty((n,n))
        self.link_matrix[:] = np.nan

        for i in range(len(self.layer_nodes) - 1):
            curr_layer_nodes_indices = [node_indices[d] for d in self.layer_nodes[i]]
            next_layer_nodes = [node_indices[d] for d in self.layer_nodes[i+1]]
            for s, t in product(curr_layer_nodes_indices, next_layer_nodes):
                self.link_matrix[s, t] = 1
        
        # set node x locations
        self.nodes_x = np.repeat(np.linspace(0, 100, 
                                             len(self.layer_nodes) + 1, 
                                             endpoint=False)[1:], 
                                 [len(n) for n in self.layer_nodes])

        # set node y locations
        self.nodes_y = np.array([])
        for layer in self.layer_nodes:
            n = len(layer)
            ys = np.linspace(0, 100, n+1, endpoint=False)[1:]
            self.nodes_y = np.append(self.nodes_y, ys[::-1])
        
        # set node colors
        n_layers = len(self.layer_nodes)
        self.node_colors = np.repeat(np.array(self.layer_colors[:n_layers]), 
                                     [len(layer) for layer in self.layer_nodes]).tolist()
        
        xs = LinearScale(min=0, max=100)
        ys = LinearScale(min=0, max=100)
        
        self.graph = Graph(node_data=[{'label': d, 
                                       'label_display': 'none'} for d in self.flattened_layer_nodes], 
                           link_matrix=self.link_matrix, link_type='line',
                           colors=self.node_colors, directed=self.directed_links,
                           scales={'x': xs, 'y': ys}, x=self.nodes_x, y=self.nodes_y)
        self.graph.hovered_style = {'stroke': '1.5'}
        self.graph.unhovered_style = {'opacity': '0.1'}
        self.graph.selected_style = {'opacity': '1',
                                     'stroke': 'red',
                                     'stroke-width': '2.5'}
        self.marks = [self.graph]
        self.title = 'Analyzing the Trained Neural Network'
        self.layout.width = str(self.width) + 'px'
        self.layout.height = str(self.height) + 'px'

In [11]:
nn = NeuralNet(num_inputs=12, num_hidden_layers=[20, 10], num_outputs=1)

epoch_slider = IntSlider(description='Epoch:', min=1, max=num_epochs, value=1)
mode_dd = Dropdown(description='View', options=['Weights', 'Gradients', 'Activations'], value='Weights')
update_btn = Button(description='Update')

bar_figure = plt.figure()
bar_plot = plt.bar([], [], scales={'x': OrdinalScale()})

hist_figure = plt.figure(title='Histogram of Activations')
hist_plot = plt.hist([], bins=20)

controls = HBox([epoch_slider, mode_dd, update_btn])
nn.graph.tooltip = bar_figure

In [12]:
def update_bar_chart(layer, node):
    epoch = epoch_slider.value
    
    if mode_dd.value == 'Activations':
        display_vals = get_activations_hist(epoch, layer-1, node)
        hist_plot.sample = display_vals
        nn.graph.tooltip = hist_figure
        hist_figure.title = mode_dd.value + ' for layer:' + str(layer) + ' node: ' + str(node) + ' at epoch: ' + str(epoch)
        return

    if mode_dd.value == 'Weights':
        display_vals = get_weights_for_node_at_layer(cleaned_weights, epoch, layer-1, node)
    elif mode_dd.value == 'Gradients':
        display_vals = get_gradients_for_node_at_layer(weights_callback.gradients, epoch, layer-1, node)
        
    return_vals = np.append([display_vals[0]], display_vals[1])
    
    bar_figure.title = mode_dd.value + ' for layer:' + str(layer) + ' node: ' + str(node) + ' at epoch: ' + str(epoch)
    bar_plot.x = np.arange(len(return_vals))
    bar_plot.y = return_vals
    
node_counts = [nn.num_inputs] + nn.num_hidden_layers + [nn.nodes_output_layer]

def hovered_change(change):
    point_index = change['new']
    if point_index is None:
        return
    else:
        for i, n in enumerate(node_counts):
            if point_index < n:
                break
            else:
                point_index = point_index - n
        if i > 0:
            update_bar_chart(i, point_index)
    
nn.graph.observe(hovered_change, 'hovered_point')

VBox([controls, nn], layout=Layout(min_height='1000px'))    

A Jupyter Widget