1. Design a 3-layer feedforward neural network consisting of a hidden-layer of 10 neurons having logistic activation function and an output softmax layer. Assume a learning rate 𝛼 = 0.01 and decay parameter 𝛽 = 10−6. Use appropriate scaling of input features

In [1]:
import numpy as np
import theano
import theano.tensor as T

# by specifying [10] as the hidden_layer_neuron implies using 1 hidden layer with 10 neurons
# respectively by specifying [100, 100] -> 2 hidden layers each layer 100 neurons


class SoftmaxNeuralNetwork:

    def __init__(self, train_x, train_y, num_features=6, list_of_neuron_on_hidden_layer=list([10]), decay=1e-6):

        self.train_x = train_x
        self.train_y = train_y

        self.train_cost = []
        self.train_prediction = []

        self.test_cost = []
        self.test_prediction = []

        weights = []
        biases = []

        # first layer which connect to the input layer
        weights.append(
            self.init_weight(len(train_x[0]), list_of_neuron_on_hidden_layer[0]) )
        biases.append(
            self.init_bias(list_of_neuron_on_hidden_layer[0]))

        previous_layer = list_of_neuron_on_hidden_layer[0]

        for layer in range(1, len(list_of_neuron_on_hidden_layer)):
            weights.append(
                self.init_weight(previous_layer, list_of_neuron_on_hidden_layer[layer]))

            biases.append(
                self.init_bias(list_of_neuron_on_hidden_layer[layer]))

        # for output layer
        weights.append(
            self.init_weight(previous_layer, num_features)
        )

        biases.append(
            self.init_bias(num_features)
        )

        # construct neural network

        layers = []

        x_input = T.matrix('X')
        y_output = T.matrix('Y')

        prev_input = x_input

        for i in range(len(weights)-1):
            calculation = T.nnet.sigmoid(T.dot(prev_input, weights[i]) + biases[i])
            layers.append(calculation)
            prev_input = calculation

        # last output layer, use softmax function
        calculation = T.nnet.softmax(T.dot(prev_input, weights[len(weights)-1]) +
                                     biases[len(biases) - 1])
        layers.append(calculation)

        y_prediction = T.argmax(calculation, axis=1)

        sum_sqr_weights = T.sqr(weights[0])

        for i in range(1, len(weights)):
            sum_sqr_weights += T.sum(T.sqr(weights[i]))

        cost = T.mean(T.nnet.categorical_crossentropy(calculation, y_output)) + decay*T.sum(sum_sqr_weights)
        params = list(weights+biases)
        updates = self.sgd(cost=cost, params=params)

        self.computation = theano.function(
            inputs=[x_input, y_output],
            updates=updates,
            outputs=cost
        )

        self.prediction = theano.function(
            inputs=[x_input],
            outputs=y_prediction
        )

        return

    def init_bias(self, n):
        return theano.shared(np.zeros(n), theano.config.floatX)

    def init_weight(self, n_in, n_out, is_logistic_function=False):

        weight = np.random.uniform(
            size=(n_in, n_out),
            low=-np.sqrt(6. / (n_in + n_out)),
            high=np.sqrt(6. / (n_in + n_out)),
        )

        if is_logistic_function:
            weight = weight*4

        return theano.shared(weight, theano.config.floatX)

    def sgd(self, cost, params, lr=0.01):

        # return list of gradients
        grads = T.grad(cost=cost, wrt=params)

        updates = []
        for p, g in zip(params, grads):
            updates.append([p, p - g * lr])
        return updates

    def start_train(self, test_x, test_y, epochs=1000, batch_size=100):

        for i in range(epochs):

            cost = 0
            prediction_batch = []

            for cnt in range(0, len(self.train_x), batch_size):

                end = cnt + batch_size

                if end > len(self.train_x):
                    end = len(self.train_x)

                train_x_batch = self.train_x[cnt:end]
                train_y_batch = self.train_y[cnt:end]

                cost += self.computation(train_x_batch, train_y_batch)
                prediction = self.prediction(self.train_x)
                predict_in_percentage = np.mean(np.argmax(self.train_y, axis=1) == prediction)
                prediction_batch.append(predict_in_percentage)

            # predictions of train data
            prediction = np.mean(prediction_batch)

            self.train_cost.append(cost)
            self.train_prediction.append(prediction)

            if i % 5*batch_size == 0 or i == epochs-1:

                print ('epoch: %d, train cost: %s, train predictions: %s \n' % (i, cost, prediction))
                self.start_test(test_x=test_x, test_y=test_y)
                print('------------------------------------\n')

    def start_test(self, test_x, test_y):

        cost = self.computation(test_x, test_y)
        prediction = self.prediction(test_x)

        self.test_cost.append(cost)
        self.test_prediction.append(prediction)

        print ('test cost: %s, test predictions: %s \n' % (cost, np.mean(np.argmax(test_y, axis=1) == prediction)))

    def get_train_result(self):

        return self.train_cost, self.train_prediction

    def get_test_result(self):

        return self.test_cost, self.test_prediction


Above code is the class to instanstiate the softmax neural network, Below is the implementation of data preprocessing

In [2]:
import pandas as pd
import numpy as np


class DataCollector:

    def __init__(self):
        self.df_train = pd.read_csv("./data/sat_train.txt", delimiter=' ')
        self.df_test = pd.read_csv("./data/sat_test.txt", delimiter=' ')

        # change the index
        self.df_train.columns = range(self.df_train.shape[1])
        self.df_test.columns = range(self.df_test.shape[1])

        self.x_train = self.df_train[range(36)]
        self.y_train = self.df_train[36]

        self.x_test = self.df_test[range(36)]
        self.y_test = self.df_test[36]

        return

    def get_train_data(self):

        return self.x_train, self.one_hot_encoding_data(self.y_train)

    def get_test_data(self):

        return self.x_test, self.one_hot_encoding_data(self.y_test)

    def one_hot_encoding_data(self, df, limit_number=6):

        # in this case data 6 is missing so, 7 we assume to be 6
        df[df == 7] = 6
        df_return = np.zeros((df.shape[0], limit_number))
        df_return[np.arange(df.shape[0]), df - 1] = 1
        return df_return

Start training the simple multi layer softmax neural network

In [3]:
data_collector = DataCollector()
train_x, train_y = data_collector.get_train_data()
test_x, test_y = data_collector.get_test_data()

number_data = train_x.shape[0]

softmax_nn = SoftmaxNeuralNetwork(train_x=train_x.as_matrix(), train_y=train_y, list_of_neuron_on_hidden_layer=[10])
softmax_nn.start_train(batch_size=number_data, test_x=test_x, test_y=test_y)

train cost: 1.81570196997, train predictions: 0.233423545332 

test cost: 1.80494240925, test predictions: 0.233616808404 

------------------------------------

train cost: 1.58030678257, train predictions: 0.452413170952 

test cost: 1.59783264093, test predictions: 0.443221610805 

------------------------------------

train cost: 1.50772475648, train predictions: 0.554127198917 

test cost: 1.52944113557, test predictions: 0.52376188094 

------------------------------------

train cost: 1.44530774248, train predictions: 0.536535859269 

test cost: 1.46584239605, test predictions: 0.547273636818 

------------------------------------

train cost: 1.38753911742, train predictions: 0.5599909788 

test cost: 1.40975716199, test predictions: 0.541270635318 

------------------------------------

train cost: 1.33654208885, train predictions: 0.596301308074 

test cost: 1.36069419182, test predictions: 0.592796398199 

------------------------------------

train cost: 1.29282180176, trai

2. Find the optimal batch size for mini-batch gradient descent while training the neural network by evaluating the performances for different batch sizes. Set this as the batch size for the rest of the experiments

a) Plot the training error and test accuracy against number of epochs for the 3-layer
network for each batch size. Limit search space to:{4,8,16,32,64}. 

As the helper class to do plotting stuff, we create another class to do such task.

In [4]:
import matplotlib.pyplot as plt


class DataVisualization:

    def __init__(self):
        return

    def show_plot(self, list_x_point, list_y_point, x_label, y_label, title, figure_name):
        plt.figure()
        plt.plot(list_x_point, list_y_point)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(title)
        plt.savefig(figure_name)
        plt.show()

a.Plot the training error and test accuracy against number of epochs for the 3-layer
network for each batch size. Limit search space to:{4,8,16,32,64}. 

train with batch size 4

In [None]:
data_collector = DataCollector()
train_x, train_y = data_collector.get_train_data()
test_x, test_y = data_collector.get_test_data()

# number_data = train_x.shape[0]
number_data = 4

number_epoch = 100

softmax_nn = SoftmaxNeuralNetwork(train_x=train_x.as_matrix(), train_y=train_y, list_of_neuron_on_hidden_layer=[10])
softmax_nn.start_train(batch_size=number_data, test_x=test_x, test_y=test_y, epochs=number_epoch)

cost_train, prediction_train = softmax_nn.get_train_result()
cost_test, prediction_test = softmax_nn.get_test_result()

%matplotlib inline
# visualize
data_visualization = DataVisualization()
data_visualization.show_plot(
    list_x_point=range(number_epoch), list_y_point=cost_train,
    x_label="epochs", y_label="costs", title="Cross Entropy", figure_name="cross_entropy_cost.png"
)

Train with 8 batch size

In [None]:
data_collector = DataCollector()
train_x, train_y = data_collector.get_train_data()
test_x, test_y = data_collector.get_test_data()

# number_data = train_x.shape[0]
number_data = 8

number_epoch = 100

softmax_nn = SoftmaxNeuralNetwork(train_x=train_x.as_matrix(), train_y=train_y, list_of_neuron_on_hidden_layer=[10])
softmax_nn.start_train(batch_size=number_data, test_x=test_x, test_y=test_y, epochs=number_epoch)

cost_train, prediction_train = softmax_nn.get_train_result()
cost_test, prediction_test = softmax_nn.get_test_result()

%matplotlib inline
# visualize
data_visualization = DataVisualization()
data_visualization.show_plot(
    list_x_point=range(number_epoch), list_y_point=cost_train,
    x_label="epochs", y_label="costs", title="Cross Entropy", figure_name="cross_entropy_cost.png"
)

Train with 16 batch size

In [None]:
data_collector = DataCollector()
train_x, train_y = data_collector.get_train_data()
test_x, test_y = data_collector.get_test_data()

# number_data = train_x.shape[0]
number_data = 16

number_epoch = 100

softmax_nn = SoftmaxNeuralNetwork(train_x=train_x.as_matrix(), train_y=train_y, list_of_neuron_on_hidden_layer=[10])
softmax_nn.start_train(batch_size=number_data, test_x=test_x, test_y=test_y, epochs=number_epoch)

cost_train, prediction_train = softmax_nn.get_train_result()
cost_test, prediction_test = softmax_nn.get_test_result()

%matplotlib inline
# visualize
data_visualization = DataVisualization()
data_visualization.show_plot(
    list_x_point=range(number_epoch), list_y_point=cost_train,
    x_label="epochs", y_label="costs", title="Cross Entropy", figure_name="cross_entropy_cost.png"
)

Train with 32 batch size 

In [None]:
data_collector = DataCollector()
train_x, train_y = data_collector.get_train_data()
test_x, test_y = data_collector.get_test_data()

# number_data = train_x.shape[0]
number_data = 32

number_epoch = 100

softmax_nn = SoftmaxNeuralNetwork(train_x=train_x.as_matrix(), train_y=train_y, list_of_neuron_on_hidden_layer=[10])
softmax_nn.start_train(batch_size=number_data, test_x=test_x, test_y=test_y, epochs=number_epoch)

cost_train, prediction_train = softmax_nn.get_train_result()
cost_test, prediction_test = softmax_nn.get_test_result()

%matplotlib inline
# visualize
data_visualization = DataVisualization()
data_visualization.show_plot(
    list_x_point=range(number_epoch), list_y_point=cost_train,
    x_label="epochs", y_label="costs", title="Cross Entropy", figure_name="cross_entropy_cost.png"
)

Train with 64 batch size

In [None]:
data_collector = DataCollector()
train_x, train_y = data_collector.get_train_data()
test_x, test_y = data_collector.get_test_data()

# number_data = train_x.shape[0]
number_data = 64

number_epoch = 100

softmax_nn = SoftmaxNeuralNetwork(train_x=train_x.as_matrix(), train_y=train_y, list_of_neuron_on_hidden_layer=[10])
softmax_nn.start_train(batch_size=number_data, test_x=test_x, test_y=test_y, epochs=number_epoch)

cost_train, prediction_train = softmax_nn.get_train_result()
cost_test, prediction_test = softmax_nn.get_test_result()

%matplotlib inline
# visualize
data_visualization = DataVisualization()
data_visualization.show_plot(
    list_x_point=range(number_epoch), list_y_point=cost_train,
    x_label="epochs", y_label="costs", title="Cross Entropy", figure_name="cross_entropy_cost.png"
)