# Dropout Neural Networks in Python

### Modifying the Weight Arrays

In [65]:
import numpy as np
import random

input_nodes = 10
hidden_nodes = 5
output_nodes = 7

wih = np.random.randint(-10, 10, (hidden_nodes, input_nodes))
wih

array([[ -2,  -3,   8,   5,   8,   1,  -8,   3,  -2,   3],
       [-10,   3,  -9,   7,   2,   5,  -9,   1,   5,  -1],
       [  1,  -9,  -3,  -7,  -7,   2,   5,   3,  -8,   4],
       [ -1,   4,  -9,   7,  -7,  -5,  -1,  -5,   7,  -6],
       [  8,   2,  -3,  -8,   3, -10,  -2,   6, -10,   6]])

In [66]:
active_input_percentage = 0.7
active_input_nodes = int(input_nodes * active_input_percentage)
active_input_indices = sorted(random.sample(range(0, input_nodes), active_input_nodes))

active_input_indices

[0, 2, 3, 4, 6, 8, 9]

In [67]:
wih_old = wih.copy()
wih = wih[:, active_input_indices]
wih

array([[ -2,   8,   5,   8,  -8,  -2,   3],
       [-10,  -9,   7,   2,  -9,   5,  -1],
       [  1,  -3,  -7,  -7,   5,  -8,   4],
       [ -1,  -9,   7,  -7,  -1,   7,  -6],
       [  8,  -3,  -8,   3,  -2, -10,   6]])

In [68]:
who = np.random.randint(-10, 10, (output_nodes, hidden_nodes))

print(who)
active_hidden_percentage = 0.7
active_hidden_nodes = int(hidden_nodes * active_hidden_percentage)
active_hidden_indices = sorted(random.sample(range(0, hidden_nodes),
                                             active_hidden_nodes))

print(active_hidden_indices)

who_old = who.copy()
who = who[:, active_hidden_indices]

print(who)

[[ 5  2 -7  2  8]
 [-5 -6  5  7  6]
 [ 6 -5 -6 -4  4]
 [-7 -1  8  3  5]
 [ 1  8 -2  6 -5]
 [-2 -1  6 -6  2]
 [-4 -6 -4 -3 -4]]
[0, 1, 4]
[[ 5  2  8]
 [-5 -6  6]
 [ 6 -5  4]
 [-7 -1  5]
 [ 1  8 -5]
 [-2 -1  2]
 [-4 -6 -4]]


In [69]:
wih = wih[active_hidden_indices]
wih

array([[ -2,   8,   5,   8,  -8,  -2,   3],
       [-10,  -9,   7,   2,  -9,   5,  -1],
       [  8,  -3,  -8,   3,  -2, -10,   6]])

In [70]:
input_nodes = 10
hidden_nodes = 5
output_nodes = 7

wih = np.random.randint(-10, 10, (hidden_nodes, input_nodes))
print('wih: \n', wih)

who = np.random.randint(-10, 10, (output_nodes, hidden_nodes))
print('who: \n', who)

active_input_percentage = 0.7
active_hidden_percentage = 0.7

active_input_nodes = int(input_nodes * active_input_percentage)
active_input_indices = sorted(random.sample(range(0, hidden_nodes),
                                            active_hidden_nodes))

print('active hidden indices: ', active_hidden_indices)

wih_old = wih.copy()
wih = wih[:, active_input_indices]
print('\nwih after deactivating input nodes:\n', wih)

wih = wih[active_hidden_indices]
print('\nwih after deactivating hidden nodes:\n', wih)

who_old = who.copy()
who = who[:, active_hidden_indices]
print('\nwho after deactivating hidden nodes:\n', who)

wih: 
 [[ -6  -6   1   3   9  -7   1   1  -7  -4]
 [  2   3  -9   9   9   1   2   2   9   8]
 [  6   8   0   4   3   0   2  -8   0  -3]
 [  8  -6   9  -8 -10  -5   5 -10  -3   3]
 [ -2  -6  -4   4   0   7   4   5  -9   9]]
who: 
 [[  4  -4   3 -10   1]
 [  4 -10   6   4  -6]
 [ -4   5   7  -6   6]
 [ -9  -9  -4  -7   7]
 [  6  -5  -1  -3   2]
 [ -3  -6 -10  -5   6]
 [  3   8  -6   3   9]]
active hidden indices:  [0, 1, 4]

wih after deactivating input nodes:
 [[-6  1  3]
 [ 3 -9  9]
 [ 8  0  4]
 [-6  9 -8]
 [-6 -4  4]]

wih after deactivating hidden nodes:
 [[-6  1  3]
 [ 3 -9  9]
 [-6 -4  4]]

who after deactivating hidden nodes:
 [[  4  -4   1]
 [  4 -10  -6]
 [ -4   5   6]
 [ -9  -9   7]
 [  6  -5   2]
 [ -3  -6   6]
 [  3   8   9]]


In [71]:
from scipy.special import expit as activation_function
from scipy.stats import truncnorm

def truncated_normal(mean=0, sd=1, low=0, upp=10):
    return truncnorm(
        (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)

class NeuralNetwork:
    def __init__(self, no_of_in_nodes, no_of_out_nodes,
                 no_of_hidden_nodes, learning_rate, bias=None):
        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.learning_rate = learning_rate
        self.bias = bias
        self.create_weight_matrices()


    def create_weight_matrices(self):
        bias_node = 1 if self.bias else 0

        n = (self.no_of_in_nodes + bias_node) * self.no_of_hidden_nodes

        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        self.wih = X.rvs(n).reshape((self.no_of_hidden_nodes,
                                     self.no_of_in_nodes + bias_node))
        

        n = (self.no_of_hidden_nodes + bias_node) * self.no_of_out_nodes
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        self.who = X.rvs(n).reshape((self.no_of_out_nodes, (self.no_of_hidden_nodes + bias_node)))

    def dropout_weight_matrices(self, active_input_percentage=0.70,
                                active_hidden_percentage=0.70):
        self.wih_orig = self.wih.copy()
        self.no_of_in_nodes_orig = self.no_of_in_nodes
        self.no_of_hidden_nodes_orig = self.no_of_hidden_nodes
        self.who_orig = self.who.copy()

        active_input_nodes = int(self.no_of_in_nodes * active_input_percentage)
        active_input_indices = sorted(random.sample(range(0, self.no_of_in_nodes), active_input_nodes))

        active_hidden_nodes = int(self.no_of_hidden_nodes * active_hidden_percentage)
        active_hidden_indices = sorted(random.sample(range(0, self.no_of_hidden_nodes), active_hidden_nodes))

        self.wih = self.wih[:, active_input_indices][active_hidden_indices]
        self.who = self.who[:, active_hidden_indices]

        self.no_of_hidden_nodes = active_hidden_nodes
        self.no_of_in_nodes = active_input_nodes

        return active_input_indices, active_hidden_indices
    
    def weight_matrices_reset(self, active_input_indices,
                                active_hidden_indices):
        temp = self.wih_orig.copy()[:, active_input_indices]
        temp[active_hidden_indices] = self.wih

        self.wih_orig[:, active_input_indices] = temp
        self.wih = self.wih_orig.copy()

        self.who_orig[:, active_hidden_indices] = self.who
        self.who = self.who_orig.copy()

        self.no_of_in_nodes = self.no_of_in_nodes_orig
        self.no_of_hidden_nodes = self.no_of_hidden_nodes_orig


    def train_single(self, input_vector, target_vector):
        if self.bias:
            input_vector = np.concatenate((input_vector, [self.bias]))

        input_vector = np.array(input_vector, ndmin=2).T
        target_vector = np.array(target_vector, ndmin=2).T

        output_vector1 = np.dot(self.wih, input_vector)
        output_vector_hidden = activation_function(output_vector1)

        if self.bias:
            output_vector_hidden = np.concatenate((output_vector_hidden, [[self.bias]]))

        output_vector2 = np.dot(self.who, output_vector_hidden)
        output_vector_network = activation_function(output_vector2)

        output_errors = target_vector - output_vector_network

        tmp = output_errors * output_vector_network * (1.0 - output_vector_network)
        tmp = self.learning_rate * np.dot(tmp, output_vector_hidden.T)
        
        self.who += tmp

        hidden_errors = np.dot(self.who.T, output_errors)

        tmp = hidden_errors * output_vector_hidden * (1.0 - output_vector_hidden)

        if self.bias:
            x = np.dot(tmp, input_vector.T)[:-1, :]
        else:
            x = np.dot(tmp, input_vector.T)

        self.wih += self.learning_rate * x

    def train(self, data_array, labels_one_hot_array, epochs=1, 
              active_input_percentage=0.70, active_hidden_percentage=0.70,
              no_of_dropout_tests = 10):
       
        partition_length = int(len(data_array)/ no_of_dropout_tests)

        for epoch in range(epochs):
            print('epoch: ', epoch)
            
            for start in range(0, len(data_array), partition_length):
                active_in_indices, active_hidden_indices = self.dropout_weight_matrices(active_input_percentage, active_hidden_percentage)

            for i in range(start, start + partition_length):
                self.train_single(data_array[i][active_in_indices], labels_one_hot_array[i])
                self.weight_matrices_reset(active_in_indices, active_hidden_indices)


    def confusion_matrix(self, data_array, labels):
        cm = {}

        for i in range(len(data_array)):
            res = self.run(data_array[i])
            res_max = res.argmax()

            target = labels[i][0]

            if (target, res_max) in cm:
                cm[(target, res_max)] += 1
            else:
                cm[(target, res_max)] = 1

        return cm
    

    def run(self, input_vector):
        if self.bias:
            input_vector = np.concatenate((input_vector, [self.bias]))

        input_vector = np.array(input_vector, ndmin=2).T

        output_vector = np.dot(self.wih, input_vector)
        output_vector = activation_function(output_vector)

        if self.bias:
            output_vector = np.concatenate((output_vector, [[self.bias]]))

        output_vector = np.dot(self.who, output_vector)
        output_vector = activation_function(output_vector)

        return output_vector

    def evaluate(self, data, labels):
        corrects, wrongs = 0, 0

        for i in range(len(data)):
            res = self.run(data[i])
            res_max = res.argmax()

            if res_max == labels[i]:
                corrects += 1
            else:
                wrongs += 1

        return corrects, wrongs


In [1]:
import pickle


with open('pickled_mnist.pkl', 'br') as fh:
    data = pickle.load(fh)

train_imgs, test_imgs = data[0], data[1]
train_labels, test_labels = data[2], data[3]

no_of_different_labels = 10
lr = np.arange(no_of_different_labels)


train_labels_one_hot = (lr==train_labels).astype(np.float64)
test_labels_one_hot = (lr==test_labels).astype(np.float64)

image_size = 28
no_of_different_labels = 10
image_pixels = image_size * image_size

NameError: name 'np' is not defined

In [73]:
parts = 10
partition_length = int(len(train_imgs) / parts)
print(partition_length)

start = 0
for start in range(0, len(train_imgs), partition_length):
    print(start, start + partition_length)

6000
0 6000
6000 12000
12000 18000
18000 24000
24000 30000
30000 36000
36000 42000
42000 48000
48000 54000
54000 60000


In [74]:
epochs = 3
simple_network = NeuralNetwork(no_of_in_nodes = image_pixels,
                               no_of_out_nodes = 10,
                               no_of_hidden_nodes=100,
                               learning_rate=0.1)

simple_network.train(train_imgs, train_labels_one_hot,
                     active_input_percentage=0.7, active_hidden_percentage=0.7,
                     no_of_dropout_tests=100, epochs=epochs)

epoch:  0
epoch:  1
epoch:  2


In [75]:
corrects, wrongs = simple_network.evaluate(train_imgs, train_labels)
print('accuracy train: ', corrects / (corrects + wrongs))

corrects, wrongs = simple_network.evaluate(test_imgs, test_labels)
print('accuracy test: ', corrects / (corrects + wrongs))

ValueError: shapes (0,0) and (784,1) not aligned: 0 (dim 1) != 784 (dim 0)