In [3]:
import numpy as np
import random

input_nodes = 10
hidden_nodes = 5
output_nodes = 7

wih = np.random.randint(-10, 10,(hidden_nodes, input_nodes))
wih

array([[ -7,   6,   7,   7,  -4,   4,   1,  -2,   9,   7],
       [ -2,  -9,   0, -10,  -5,   6,   6,   2,  -2,   7],
       [  1,  -1,   4,  -5,   7,  -1,   8,  -1,   6,  -5],
       [ -3,  -3,  -5,   3,  -5,  -7,  -6,   6,   3,   4],
       [  3,  -3,   2,   0,   5,   4,  -3,   6,   1,   8]])

We will choose now the active nodes for the input layer. We calculate random indices for the active nodes:

In [4]:
active_input_percentage = 0.7
active_input_nodes = int(input_nodes * active_input_percentage)
active_input_indices = sorted(random.sample(range(0, input_nodes), active_input_nodes))

active_input_indices

[0, 1, 4, 5, 7, 8, 9]

We learned above that we have to remove the column j, if the node ij is removed. We can easily accomplish
this for all deactived nodes by using the slicing operator with the active nodes:

In [5]:
wih_old = wih.copy()
wih_old

array([[ -7,   6,   7,   7,  -4,   4,   1,  -2,   9,   7],
       [ -2,  -9,   0, -10,  -5,   6,   6,   2,  -2,   7],
       [  1,  -1,   4,  -5,   7,  -1,   8,  -1,   6,  -5],
       [ -3,  -3,  -5,   3,  -5,  -7,  -6,   6,   3,   4],
       [  3,  -3,   2,   0,   5,   4,  -3,   6,   1,   8]])

In [6]:
wih = wih[:, active_input_indices]
wih

array([[-7,  6, -4,  4, -2,  9,  7],
       [-2, -9, -5,  6,  2, -2,  7],
       [ 1, -1,  7, -1, -1,  6, -5],
       [-3, -3, -5, -7,  6,  3,  4],
       [ 3, -3,  5,  4,  6,  1,  8]])

As we have mentioned before, we will have to modify both the 'wih' and the 'who' matrix:

In [7]:
who = np.random.randint(-10,10, (output_nodes, hidden_nodes))

print(who)
active_hidden_percentage = 0.7
active_hidden_nodes = int(hidden_nodes * active_hidden_percentage)
active_input_indices = sorted(random.sample(range(0, hidden_nodes), active_hidden_nodes))

print(active_input_indices)

who_old = who.copy()
who = who[:, active_input_indices]
print(who)

[[ -4   0 -10   7 -10]
 [  0   3  -8  -2  -4]
 [ -3  -3  -5  -4   1]
 [ -3   3  -6  -5   6]
 [  1   8   1  -4  -3]
 [  8   9   8   1  -7]
 [  6   4   8   1   2]]
[0, 2, 3]
[[ -4 -10   7]
 [  0  -8  -2]
 [ -3  -5  -4]
 [ -3  -6  -5]
 [  1   1  -4]
 [  8   8   1]
 [  6   8   1]]


### Summery

In [11]:
import numpy as np
import random

input_nodes = 10
hidden_nodes = 5
output_notes = 7

wih = np.random.randint(-10, 10, (hidden_nodes, input_nodes))
print("wih: \n", wih)
who = np.random.randint(-10, 10,(output_nodes, hidden_nodes))
print("who: \n", who)

active_input_percentage = 0.7
active_hidden_percentage = 0.7

active_input_nodes = int(input_nodes * active_input_percentage)
active_input_indices = sorted(random.sample(range(0, input_nodes), active_input_nodes))

print("\nactive input indices: ", active_input_indices)
active_hidden_nodes = int(hidden_nodes * active_hidden_percentage)
active_hidden_indices = sorted(random.sample(range(0, hidden_nodes),active_hidden_nodes))

print("active hidden indices: ", active_input_indices)

wih_old = wih.copy()
wih = wih[:, active_input_indices]
print("\nwih after deactivating input nodes:\n", wih)
wih = wih[active_hidden_indices]
print("\nwih after deactivating hidden nodes:\n", wih)

who_old = who.copy()
who = who[:, active_hidden_indices]
print("\nwih after deactivating hidden nodes:\n", who)

wih: 
 [[ -2  -4 -10   6   7   9   4  -4  -9  -3]
 [ -3   1   4   1  -7 -10  -7  -3   0  -4]
 [ -8   5  -9   9  -8  -6   9   3   1   4]
 [ -3  -6  -4  -4  -1   8  -1  -4   4   1]
 [  5 -10  -1  -1   0  -5  -9  -9   2  -8]]
who: 
 [[-9  3  0  5  9]
 [-8  6  7  9  4]
 [-2  8  8  9 -3]
 [-2  5  0 -9 -1]
 [-6  8 -5 -3  5]
 [-7  8 -2 -7  4]
 [ 4  6 -7 -5 -3]]

active input indices:  [0, 1, 2, 3, 5, 6, 7]
active hidden indices:  [0, 1, 2, 3, 5, 6, 7]

wih after deactivating input nodes:
 [[ -2  -4 -10   6   9   4  -4]
 [ -3   1   4   1 -10  -7  -3]
 [ -8   5  -9   9  -6   9   3]
 [ -3  -6  -4  -4   8  -1  -4]
 [  5 -10  -1  -1  -5  -9  -9]]

wih after deactivating hidden nodes:
 [[ -2  -4 -10   6   9   4  -4]
 [ -3   1   4   1 -10  -7  -3]
 [ -3  -6  -4  -4   8  -1  -4]]

wih after deactivating hidden nodes:
 [[-9  3  5]
 [-8  6  9]
 [-2  8  9]
 [-2  5 -9]
 [-6  8 -3]
 [-7  8 -7]
 [ 4  6 -5]]


In [21]:
import numpy as np
import random
from scipy.special import expit as activation_function
from scipy.stats import truncnorm

def truncated_normal(mean=0, sd=1, low=0, upp=10):
    return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)


class NeuralNetwork:
    
    def __init__(self,
                no_of_in_nodes,
                no_of_out_nodes,
                no_of_hidden_nodes,
                learning_rate,
                bias=None
                ):
        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.learning_rate = learning_rate
        self.bias = bias
        self.create_weight_matrices()

    def create_weight_matrices(self):
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        bias_node = 1 if self.bias else 0
        
        n = (self.no_of_in_nodes + bias_node) * self.no_of_hidden_nodes
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        
        self.wih = X.rvs(n).reshape((self.no_of_hidden_nodes,self.no_of_in_nodes + bias_node))
        n = (self.no_of_hidden_nodes + bias_node) * self.no_of_out_nodes
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        self.who = X.rvs(n).reshape((self.no_of_out_nodes,(self.no_of_hidden_nodes + bias_node)))

    def dropout_weight_matrices(self,
        active_input_percentage=0.70,
        active_hidden_percentage=0.70):
        # restore wih array, if it had been used for dropout
        self.wih_orig = self.wih.copy()
        self.no_of_in_nodes_orig = self.no_of_in_nodes
        self.no_of_hidden_nodes_orig = self.no_of_hidden_nodes
        self.who_orig = self.who.copy()
        
        active_input_nodes = int(self.no_of_in_nodes * active_input_percentage)
        active_input_indices = sorted(random.sample(range(0, self.no_of_in_nodes),active_input_nodes))
        active_hidden_nodes = int(self.no_of_hidden_nodes * active_hidden_percentage)
        active_hidden_indices = sorted(random.sample(range(0, self.no_of_hidden_nodes),active_hidden_nodes))
        self.wih = self.wih[:, active_input_indices][active_hidden_indices]
        self.who = self.who[:, active_hidden_indices]
        self.no_of_hidden_nodes = active_hidden_nodes
        self.no_of_in_nodes = active_input_nodes
        
        return active_input_indices, active_hidden_indices
    
    def weight_matrices_reset(self,
                                active_input_indices,
                                active_hidden_indices):
        """
        self.wih and self.who contain the newly adapted values fro
        m the active nodes.
        We have to reconstruct the original weight matrices by ass
        igning the new values
        from the active nodes
        """
        temp = self.wih_orig.copy()[:,active_input_indices]
        temp[active_hidden_indices] = self.wih

        self.wih_orig[:, active_input_indices] = temp
        self.wih = self.wih_orig.copy()
        self.who_orig[:, active_hidden_indices] = self.who
        self.who = self.who_orig.copy()
        self.no_of_in_nodes = self.no_of_in_nodes_orig
        self.no_of_hidden_nodes = self.no_of_hidden_nodes_orig

    def train_single(self, input_vector, target_vector):
        """
        input_vector and target_vector can be tuple, list or ndarray
        """
        if self.bias:
            # adding bias node to the end of the input_vector
            input_vector = np.concatenate( (input_vector, [self.bias]) )
        
        input_vector = np.array(input_vector, ndmin=2).T
        target_vector = np.array(target_vector, ndmin=2).T
        
        output_vector1 = np.dot(self.wih, input_vector)
        output_vector_hidden = activation_function(output_vector1)
        
        if self.bias:
            output_vector_hidden = np.concatenate( (output_vector_hidden, [[self.bias]]) )
        
        output_vector2 = np.dot(self.who, output_vector_hidden)
        output_vector_network = activation_function(output_vector2)
        output_errors = target_vector - output_vector_network
       
        # update the weights:
        tmp = output_errors * output_vector_network * (1.0 - output_vector_network)
        tmp = self.learning_rate * np.dot(tmp, output_vector_hidden.T)
        
        self.who += tmp 
        
        # calculate hidden errors:
        hidden_errors = np.dot(self.who.T, output_errors)
        # update the weights:
        tmp = hidden_errors * output_vector_hidden * (1.0 - output_vector_hidden)
        if self.bias:
            x = np.dot(tmp, input_vector.T)[:-1,:]
        else:
            x = np.dot(tmp, input_vector.T)
            
        self.wih += self.learning_rate * x
        
    def train(self, data_array,
            labels_one_hot_array,
            epochs=1,
            active_input_percentage=0.70,
            active_hidden_percentage=0.70,
            no_of_dropout_tests = 10):
        partition_length = int(len(data_array) / no_of_dropout_tests)
        for epoch in range(epochs):
            print("epoch: ", epoch)
            for start in range(0, len(data_array), partition_length):
                active_in_indices, active_hidden_indices = self.dropout_weight_matrices(active_input_percentage,active_hidden_percentage)
                for i in range(start, start + partition_length):
                    self.train_single(data_array[i][active_in_indices],labels_one_hot_array[i])
                self.weight_matrices_reset(active_in_indices, active_hidden_indices)
                
    def confusion_matrix(self, data_array, labels):
        cm = {}
        for i in range(len(data_array)):
            res = self.run(data_array[i])
            res_max = res.argmax()
            target = labels[i][0]
            if (target, res_max) in cm:
                cm[(target, res_max)] += 1
            else:
                cm[(target, res_max)] = 1
        return cm
    
    def run(self, input_vector):
        # input_vector can be tuple, list or ndarray
        if self.bias:
            # adding bias node to the end of the input_vector
            input_vector = np.concatenate( (input_vector, [self.bias]) )
        
        input_vector = np.array(input_vector, ndmin=2).T
        output_vector = np.dot(self.wih, input_vector)
        output_vector = activation_function(output_vector)
        
        if self.bias:
            output_vector = np.concatenate( (output_vector, [[self.bias]]) )
        
        output_vector = np.dot(self.who, output_vector)
        output_vector = activation_function(output_vector)
        
        return output_vector
    
    def evaluate(self, data, labels):
        corrects, wrongs = 0, 0
        for i in range(len(data)):
            res = self.run(data[i])
            res_max = res.argmax()
            if res_max == labels[i]:
                corrects += 1
            else:
                wrongs += 1
        return corrects, wrongs

In [13]:
import pickle

with open("data/mnist/pickled_mnist.pkl", "br") as fh:
    data = pickle.load(fh)
    
train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = data[4]
test_labels_one_hot = data[5]

image_size = 28 # width and length
no_of_different_labels = 10 # i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size

parts = 10
partition_length = int(len(train_imgs) / parts)
print(partition_length)

start = 0
for start in range(0, len(train_imgs), partition_length):
    print(start, start + partition_length)

6000
0 6000
6000 12000
12000 18000
18000 24000
24000 30000
30000 36000
36000 42000
42000 48000
48000 54000
54000 60000


In [22]:
epochs = 3
simple_network = NeuralNetwork(no_of_in_nodes = image_pixels,
                            no_of_out_nodes = 10,
                            no_of_hidden_nodes = 100,
                            learning_rate = 0.1)

simple_network.train(train_imgs,
                    train_labels_one_hot,
                    active_input_percentage=1,
                    active_hidden_percentage=1,
                    no_of_dropout_tests = 100,
                     epochs = epochs)

epoch:  0
epoch:  1
epoch:  2


In [23]:
corrects, wrongs = simple_network.evaluate(train_imgs, train_labels)
print("accuracy train: ", corrects / ( corrects + wrongs))
corrects, wrongs = simple_network.evaluate(test_imgs, test_labels)
print("accuracy: test", corrects / ( corrects + wrongs))

accuracy train:  0.9093333333333333
accuracy: test 0.9095
