Working with Neural Network Models

© Hans Nieminen, Satakunta University of Applied Sciences

# Exercise 5.1

In [None]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder

def cross_entropy(y, y_pred):
    """Parameter ``y`` contains the target labels as
    one-hot encoded values, e.g. np.array([[1,0,0], [0,0,1], [0,1,0]])
    Parameter ``y_hat`` contains the predictions as probability spaces,
    e.g. np.array([[0.8,0.1,0.1],[0.2,0.1,0.7],[0.3,0.4,0.3]])"""
    loss = 0

    # Doing cross entropy Loss
    for i in range(len(y_pred)):
      loss -= y[i].dot(np.log(y_pred[i]))

    return loss/len(y_pred)

In [None]:
y = np.array([[0],
              [2],
              [2],
              [1],
              [0]])
y_pred = np.array([[0.6, 0.15, 0.25],
                   [0.1, 0.2, 0.7],
                   [0.2, 0.35, 0.45],
                   [0.1, 0.5, 0.4],
                   [0.5, 0.2, 0.3]])

In [None]:
y_ohe = OneHotEncoder().fit_transform(y).toarray()
y_ohe

array([[1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [None]:
loss_value = cross_entropy(y_ohe, y_pred)
print(loss_value.round(3))

0.61


In [None]:
y_pred2 = np.array([[0.8, 0.15, 0.05],
                    [0.1, 0.15, 0.75],
                    [0.1, 0.15, 0.75],
                    [0.1, 0.7, 0.2],
                    [0.6, 0.2, 0.2]])

In [None]:
loss_value2 = cross_entropy(y_ohe, y_pred2)
print(loss_value2.round(3))

0.333


In [None]:
print('decrease of the loss value in percents',
      round((loss_value-loss_value2)/loss_value,2)*100)

decrease of the loss value in percents 45.0


# Exercise 5.2

In [None]:
import numpy as np

In [None]:
class ANN(object):
    def __init__(self, sizes, hidden_act = "none", output_act = "none",
                 random_state = None):
        """The list ``sizes`` contains the number of neurons in the
        respective layers of the network.
        For example, if the list was [2, 3, 1] then it would be a
        three-layer network, with the first layer containing 2 neurons,
        the second layer 3 neurons, and the third layer 1 neuron.
        The biases and weights for the network are initialized randomly,
        using a Gaussian distribution with mean 0, and variance 1.
        Note that the first layer is assumed to be an input layer.
        Parameter ``hidden_act`` contains the activation function name used
        in hidden layers.
        Parameter ``output_act`` contains the activation function name used
        in output layer."""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.input_size = sizes[0]
        self.output_size = sizes[-1]

        # biases and weights together
        self.W = []
        rng = np.random.RandomState(random_state)
        for i in np.arange(0, self.num_layers - 2):
            # initialize weight matrix (the + 1 is for the bias)
            w = rng.randn(self.sizes[i] + 1, self.sizes[i + 1] )
            self.W.append(w / np.sqrt(self.sizes[i]))
        # last layer is different: it doesn't need a bias
        w = rng.randn(self.sizes[-2] + 1, self.sizes[-1])
        self.W.append(w / np.sqrt(self.sizes[-2]))

        # set activation functions for hidden layers
        if hidden_act.lower() == "sigmoid":
            self.hidden_act = self.sigmoid
        elif hidden_act.lower() == "tanh":
            self.hidden_act = self.tanh
        elif hidden_act.lower() == "relu":
            self.hidden_act = self.ReLU
        elif hidden_act.lower() == "none":
            self.hidden_act = self.no_activation
        else:
            raise ValueError('Parameter hidden_act has to be one of these: "sigmoid", "tanh", "ReLU", "none".')

        # set activation function for output layer
        if output_act.lower() == 'none':
            self.output_act = self.no_activation
        elif output_act.lower() == "sigmoid":
            self.output_act = self.sigmoid
        elif output_act.lower() == "softmax":
            self.output_act = self.softmax
        else:
           raise ValueError('Parameter output_act has to be one of these: "sigmoid", "softmax", "none".')

    def predict(self, X, addBias = True):
        p = np.atleast_2d(X)

        if addBias:
            p = np.c_[p, np.ones((p.shape[0]))]

        # loop over layers
        for layer in np.arange(0, len(self.W)):
            if layer == len(self.W)-1:
                p = self.output_act(np.dot(p, self.W[layer]))
            else:
                p = self.hidden_act(np.dot(p, self.W[layer]))
                if addBias:
                    p = np.c_[p, np.ones((p.shape[0]))]
        return p

    # Activation functions
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def ReLU(self, x):
        return (x > 0) * x

    def no_activation(self, x):
        return x

    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

In [None]:
ann = ANN(sizes=[12,10,8,5],
           hidden_act = "ReLU",
           output_act = "softmax",
           random_state = 123)

In [None]:
ann.W

[array([[-0.31339456,  0.28790883,  0.08168886, -0.43482983, -0.16702751,
          0.47672866, -0.70052196, -0.12381641,  0.36544432, -0.2502064 ],
        [-0.19597755, -0.02734012,  0.4305271 , -0.18443512, -0.12816655,
         -0.12538641,  0.63679716,  0.63127077,  0.28984539,  0.11148241],
        [ 0.21285997,  0.43033727, -0.27015197,  0.33943261, -0.36196417,
         -0.184103  ,  0.26185871, -0.41242459, -0.04043436, -0.24876721],
        [-0.07379096, -0.80788309, -0.51139756, -0.20203715,  0.26773534,
         -0.0501243 ,  0.00082155,  0.19867278, -0.25390027,  0.08187616],
        [-0.23248929, -0.49873522, -0.11284305,  0.16564348,  0.09774224,
         -0.00341517,  0.69061637,  0.11919747,  0.28253675,  0.64609633],
        [-0.37357025, -0.29987233,  0.50336636, -0.23038087,  0.00856881,
          0.30868493,  0.25712479,  0.506592  ,  0.43175527,  0.30870707],
        [-0.22306179,  0.22945709,  0.09072251, -0.38285986,  0.40913899,
          0.23302912,  0.0131318

In [None]:
X = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.1, 0.2, 0.3, 0.4])

In [None]:
y_pred = ann.predict(X)
y_pred

array([[0.12021312, 0.46228489, 0.15625628, 0.17063779, 0.09060792]])

In [None]:
print('Predicted value is', y_pred.argmax(axis=1))

Predicted value is [1]
