In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import networkx as nx
import os
from sklearn.decomposition import PCA
import scipy
import csv
import pandas as pd

In [2]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input, Activation, BatchNormalization
from keras import backend as K
from keras.callbacks import Callback, CSVLogger
from keras import regularizers

Using TensorFlow backend.


In [31]:
# Parameters
params = {
    'epochs': 20,
    'neurons_per_layer': 100,
    'pca_components': 100,
    'number_of_layers': 5,
    'save_folder': '500-Nodes',
    'regularisation': 0.001,  # L1 regulariser weight, or None
#     'regularisation': None,
    'threshold_type': 'value',  # 'fraction' or 'value'
#     'threshold': 1E-2,  # A number between 0 and 1, or None
    'threshold': 0.08, #None
    'activation': 'elu',
    'dropout': False,
    'batch_norm': False,
}

# Save params
os.makedirs(params['save_folder'], exist_ok = True)
with open(os.path.join(params['save_folder'], 'params.csv'), 'w') as f:
    writer = csv.writer(f)
    for key, val in params.items():
        writer.writerow([key, val])

In [32]:
# Prepare data
batch_size = 128
num_classes = 10

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [33]:
# Reduce input dimensions
pca = PCA(n_components = params['pca_components'])
x_train_pca = pca.fit_transform(x_train)
x_test_pca = pca.transform(x_test)

print("Variance explained:", round(sum(pca.explained_variance_ratio_), 3))

with open(os.path.join(params['save_folder'], 'pca.csv'), 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['pca_components', params['pca_components']])
    writer.writerow(['variance_explained', round(sum(pca.explained_variance_ratio_), 3)])

Variance explained: 0.914


In [34]:
def getModel(hidden_layers = 1, neurons_per_layer = 512, input_dimension = 784, reg = None,
             dropout = False, batch_norm = False, activation = 'elu'):
    K.clear_session()
    
    model = Sequential()
    
    reg = None if reg is None else keras.regularizers.l1(reg)
    
    for i in range(hidden_layers + 1):
        if i == 0:
            model.add(Dense(neurons_per_layer, input_shape = (input_dimension,), bias = False,
                            kernel_regularizer = reg ))
        else:
            model.add(Dense(neurons_per_layer, bias = False,
                            kernel_regularizer = reg ))
        if batch_norm:
            model.add(BatchNormalization())
        model.add(Activation(activation))
        if dropout:
            model.add(Dropout(0.2))
        
    model.add(Dense(num_classes, activation = 'softmax', bias = batch_norm,
                    kernel_regularizer = reg ))
    return model

In [35]:
# Define model
model = getModel(hidden_layers = params['number_of_layers'] - 2,
                 neurons_per_layer = params['neurons_per_layer'],
                 input_dimension = params['pca_components'],
                 reg = params['regularisation'],
                 dropout = params['dropout'],
                 batch_norm = params['batch_norm'],
                 activation = params['activation'])

model.summary()

model.compile(loss = 'categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               10000     
_________________________________________________________________
activation_1 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10000     
_________________________________________________________________
activation_2 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10000     
_________________________________________________________________
activation_3 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10000     
__________

  if sys.path[0] == '':
  from ipykernel import kernelapp as app


In [36]:
class GraphSaver(Callback):
    def __init__(self, save_folder = 'G1', **other_params):
        super().__init__(**other_params)
        self.save_folder = save_folder
        os.makedirs(os.path.join(self.save_folder, 'weights'), exist_ok = True)
        self.ANNadj = {}
    
    def on_train_begin(self, logs = {}):
        self.G, self.edge_list = self.draw(0)
        adj = self.get_subgraph_adj(self.G, self.edge_list, weight = None)
        self.ANNadj["ANN_Adj_Mat_Untrained"] = adj
    
    def on_epoch_end(self, epoch, logs = {}):
        self.G, self.edge_list = self.draw(epoch + 1)
    
    def on_train_end(self, logs = {}):
        adj = self.get_subgraph_adj(self.G, self.edge_list, weight = None)
        self.ANNadj["ANN_Adj_Mat_Trained"] = adj
        scipy.io.savemat(os.path.join(self.save_folder, "ANN_Adj"), self.ANNadj)
    
    def draw(self, epoch):
        G = nx.Graph()
        new_model = export_utils.copy_remove_batchnorm(self.model)
        layers = getLayers(new_model)
        weights, bias = getLayerWeights(layers)
        self.save_weights(weights, epoch)
        initialiseGraphNodes(layers, G, bias = bias)
        initialiseGraphEdges(weights, G)
        pos = getNodePositions(G, last_layer_multiplier = params['neurons_per_layer'] // num_classes,
                               last_layer = params['number_of_layers'])
        edge_list = drawPlot(G, pos, save_name = '{}/{:02}'.format(self.save_folder, epoch))
        return G, edge_list
    
    def get_subgraph_adj(self, G, edge_list, weight = None):
        G_subgraph = G.edge_subgraph(edge_list)
        adj = nx.adjacency_matrix(G_subgraph, weight = weight)
        return adj
    
    def save_weights(self, weights, epoch):
        for l, w in weights.items():
            np.savetxt("{}/weights/epoch{:02}-layer{}.csv".format(self.save_folder, epoch, l), w, delimiter = ",")

In [37]:
import export_utils

In [38]:
def getLayers(model):
    layers = {}
    layer_number = 1
    for layer in model.layers:
        if isinstance(layer, Dense):
            layers[layer_number] = layer
            layer_number += 1
    return layers

In [39]:
def getLayerWeights(layers):
    weights = {}
    bias = {}
    for i in range(1,len(layers)+1):
        weight = layers[i].get_weights()
        if isinstance(weight, list):
            weights[i] = weight[0]
            if len(weight) > 1:
                bias[i] = weight[1]
        else:
            weights[i] = weight  # Seems unnecessary
    return weights, bias

In [40]:
def initialiseGraphNodes(layers, G, bias = None):
    for i, layer in layers.items():
        if i == 1:  # First hidden layer
            nodes = ["{}-{}".format(i-1, n) for n in range(layer.input_shape[1])]
            G.add_nodes_from(nodes)
        if bias is not None and i in bias:
            nodes = [("{}-{}".format(i, n), {'bias': b}) for n, b in enumerate(bias[i])]
        else:
            nodes = ["{}-{}".format(i, n) for n in range(layer.units)]
        G.add_nodes_from(nodes)

In [41]:
def initialiseGraphEdges(weights, G):
    edges = []
    for layer, weight in weights.items():
        rows, columns = weight.shape
        for i in range(rows):
            for j in range(columns):
                edge = ("{}-{}".format(layer-1, i), "{}-{}".format(layer, j), {'weight': weight[i,j]})
                edges.append(edge)
        
    G.add_edges_from(edges)

In [42]:
def getNodePositions(G, last_layer_multiplier = 5, last_layer = None):
    pos = {}
    for node in G.nodes():
        split = node.split('-')
        layer = int(split[0])
        neuron = int(split[1])
        pos[node] = np.array( [layer, neuron * (last_layer_multiplier if layer == last_layer else 1)] )
    return pos

In [43]:
def getEdgeColour(G, threshold_by_value = None, threshold_by_fraction = None):
    edge_list = []
    edge_color = []
    for (u, v, w) in G.edges.data('weight'):
        if threshold_by_value is None or abs(w) > threshold_by_value:
            edge_list.append((u,v))
            edge_color.append(w)
    if threshold_by_fraction is not None:
        n = round(len(edge_list) * threshold_by_fraction)
        edge_color = np.array(edge_color)
        sort_order = np.argsort(np.abs(edge_color))
        edge_color = edge_color[sort_order[-n:]]
        edge_color = edge_color.tolist()
        edge_list = [edge_list[i] for i in sort_order[-n:]]
    return edge_list, edge_color

In [44]:
def getNodeColour(G):
    node_color = []
    for (n, b) in G.nodes.data('bias'):
        if b is None:
            node_color.append(0)
        else:
            node_color.append(b)
    return node_color

In [45]:
def drawPlot(G, pos, save_name = None, show_plot = False):
    fig, ax = plt.subplots(figsize = [18, 8])
    threshold_option = {'threshold_by_{}'.format(params['threshold_type']): params['threshold']}
    edge_list, edge_color = getEdgeColour(G, **threshold_option)
    edge_cmap = plt.cm.RdBu
    edge_vmax = max(np.abs(edge_color))
    edge_vmin = -edge_vmax
    node_color = getNodeColour(G)
    node_cmap = plt.cm.viridis  # plt.cm.PRGn
    node_vmax = max(np.abs(node_color)) + 1E-12
    node_vmin = -node_vmax


    nx.draw_networkx(G, pos, with_labels = False, node_size = 50, edgelist = edge_list, edge_color = edge_color,
                     edge_cmap = edge_cmap, edge_vmin = edge_vmin, edge_vmax = edge_vmax, node_color = node_color,
                     cmap = node_cmap, vmax = node_vmax, vmin = node_vmin)
    sm = plt.cm.ScalarMappable(cmap = edge_cmap, norm = plt.Normalize(vmin = edge_vmin, vmax = edge_vmax))
    sm._A = []
    plt.colorbar(sm, label = 'Edge weight')
    if params['batch_norm']:
        sm_1 = plt.cm.ScalarMappable(cmap = node_cmap, norm = plt.Normalize(vmin = node_vmin, vmax = node_vmax))
        sm_1._A = []
        plt.colorbar(sm_1, label = 'Bias')
    plt.title("Threshold by {} = abs({})".format(params['threshold_type'], params['threshold']))
    
    if save_name is not None:
        if not save_name.endswith('.png'):
            save_name += '.png'
        plt.savefig(save_name, dpi = 200)
    
    if show_plot:
        plt.show()
    
    plt.close()
    
    return edge_list

In [46]:
# Fit and test
graphSaver = GraphSaver(params['save_folder'])
csv_logger = CSVLogger(os.path.join(params['save_folder'], 'history.csv'), append = False)

history = model.fit(x_train_pca, y_train,
                    batch_size = batch_size,
                    epochs = params['epochs'],
                    verbose = 1,
                    callbacks = [graphSaver, csv_logger],
                    validation_data = (x_test_pca, y_test))
score = model.evaluate(x_test_pca, y_test, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples


  if cb.is_numlike(alpha):


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 0.4158904040336609
Test accuracy: 0.9524


In [49]:
# Test with loaded weights
folder = os.path.join(params['save_folder'], 'weights')
weights = {}
for f in os.listdir(folder):
    if f.endswith('.csv') and f.startswith('epoch{:02}'.format(params['epochs'])):
        layer = f[-5:-4]
        w = np.loadtxt(os.path.join(folder, f), delimiter = ',')
        # Test out thresholding
        threshold = 0.08
        print("Removing fraction {} from layer {}".format(np.mean(np.abs(w) < threshold), layer))
        w[np.abs(w) < threshold] = 0
        weights[layer] = w
        
def activation(x, type):
    if type == 'elu':
        out = x
        out[x < 0] = np.exp(x[x < 0]) - 1  # f(x) =  alpha * (exp(x) - 1.) for x < 0, here alpha = 1
    else:
        raise Exception("Activation type unknown")
    
    return out

Removing fraction 0.9683 from layer 1
Removing fraction 0.9865 from layer 2
Removing fraction 0.9925 from layer 3
Removing fraction 0.9929 from layer 4
Removing fraction 0.911 from layer 5


In [50]:
X = x_test_pca
for l in range(1, params['number_of_layers'] + 1):
    X = np.dot(X, weights[str(l)])
    X = activation(X, type = params['activation'])

y = np.argmax(X, axis = 1)
print('Accuracy = {:.4f}'.format(np.mean(y == np.argmax(y_test, axis = 1))))

Accuracy = 0.9386
