In [1]:
import numpy as np
import matplotlib.pyplot as plt

from random import random
from random import randrange
from sklearn.model_selection import train_test_split

In [2]:

class MLP(object):
    """A Multilayer Perceptron class.
    """

    def __init__(self, num_inputs=3, hidden_layers=[3, 3], num_outputs=2):
        """Constructor for the MLP. Takes the number of inputs,
            a variable number of hidden layers, and number of outputs
        Args:
            num_inputs (int): Number of inputs
            hidden_layers (list): A list of ints for the hidden layers
            num_outputs (int): Number of outputs
        """

        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        # create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]

        # create random connection weights for the layers
        weights = []
        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i + 1])
            weights.append(w)
        self.weights = weights

        # for i in range(len(layers) -1):
        #     b = np.random.rand(layers[i], layers[i + 1])
        #     bias.append(b)

        bias = []        
        bias.append(np.random.rand(1))
        self.bias = bias

        # save derivatives per layer
        derivatives = []
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives

        # save activations per layer
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations

        # g = self.weights
        # k = self.bias
        # print((g))
        # print((k))
       

    def forward_propagate(self, inputs):
        """Computes forward propagation of the network based on input signals.
        Args:
            inputs (ndarray): Input signals
        Returns:
            activations (ndarray): Output values
        """

        # the input layer activation is just the input itself
        activations = inputs
        
        # save the activations for backpropogation
        self.activations[0] = activations

        # iterate through the network layers
        # for i, w in enumerate(self.weights):
        for i, w in enumerate(self.weights):
            # calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w) + self.bias[0]
            # print("Net output {}".format(net_inputs))
            # print("bias {}".format(b[0]))
            

            # apply sigmoid activation function
            activations = self._sigmoid(net_inputs)

            # save the activations for backpropogation
            self.activations[i + 1] = activations

        # return output layer activation
        return activations


    def back_propagate(self, error):
        """Backpropogates an error signal.
        Args:
            error (ndarray): The error to backprop.
        Returns:
            error (ndarray): The final error of the input
        """

        # iterate backwards through the network layers
        for i in reversed(range(len(self.derivatives))):

            # get activation for previous layer
            activations = self.activations[i+1]
            

            # apply sigmoid derivative function
            delta = error * self._sigmoid_derivative(activations)

            # reshape delta as to have it as a 2d array
            delta_re = delta.reshape(delta.shape[0], -1).T

            # get activations for current layer
            current_activations = self.activations[i]

            # reshape activations as to have them as a 2d column matrix
            current_activations = current_activations.reshape(current_activations.shape[0],-1)

            # save derivative after applying matrix multiplication
            self.derivatives[i] = np.dot(current_activations, delta_re)

            # backpropogate the next error
            error = np.dot(delta, self.weights[i].T)


    def train(self, X_train, y_train, x_test, y_test, epochs, learning_rate, weight_dacay=0):
        """Trains model running forward prop and backprop
        Args:
            inputs (ndarray): X
            targets (ndarray): Y
            epochs (int): Num. epochs we want to train the network for
            learning_rate (float): Step to apply to gradient descent
        """
        all_mse = {'train':[], 'test':[]}

        # now enter the training loop
        for i in range(epochs):
            train_sum_errors = 0

            # iterate through all the training data
            # for j, input in enumerate(inputs):
            for input, target in zip(X_train, y_train):
                # target = targets[j]

                # activate the network!
                output = self.forward_propagate(input)

                error = target - output

                self.back_propagate(error)

                # now perform gradient descent on the derivatives
                # (this will update the weights
                self.gradient_descent(learning_rate, weight_dacay)

                # keep track of the MSE for reporting later                
                train_sum_errors += self._mse(target, output)
       
            ############

            test_sum_errors = 0

            # iterate through all the training data
            # for j, input in enumerate(inputs):
            for input, target in zip(x_test, y_test):
                # target = targets[j]

                # activate the network!
                output = self.forward_propagate(input)

                # keep track of the MSE for reporting later                
                test_sum_errors += self._mse(target, output)



            train_error = train_sum_errors / len(X_train)
            test_error = test_sum_errors / len(x_test)
            all_mse['train'].append(train_error)
            all_mse['test'].append(test_error)

            # Epoch complete, report the training error
            # print("Train Error: {}, Test Error: {} at epoch {}".format(train_error, test_error, i+1))
            

        print("Training complete!")
        print("=====")
        
        return all_mse


    def gradient_descent(self, learning_rate=0, wd_rate=0):
        """Learns by descending the gradient
        Args:
            learningRate (float): How fast to learn.
        """
        # update the weights by stepping down the gradient
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            # weights += derivatives * learning_rate 
            # applying weight decay
                        
            # print('Decay: {}'.format(decay))
            decay = wd_rate/1000 # len y set
            weights += (learning_rate * derivatives) 

            if wd_rate != 0:
                weights += (decay * derivatives)


    def _sigmoid(self, x):
        """Sigmoid activation function
        Args:
            x (float): Value to be processed
        Returns:
            y (float): Output
        """

        y = 1.0 / (1 + np.exp(-x))
        return y


    def _sigmoid_derivative(self, x):
        """Sigmoid derivative function
        Args:
            x (float): Value to be processed
        Returns:
            y (float): Output
        """
        return x * (1.0 - x)


    def _mse(self, target, output):
        """Mean Squared Error loss function
        Args:
            target (ndarray): The ground trut
            output (ndarray): The predicted values
        Returns:
            (float): Output
        """
        return np.average((target - output) ** 2)



In [3]:
def sigmoid(x):
    return 1.0 / (1 + np.exp(-x))


In [4]:
def generate_points():

    x1p = np.random.uniform(size=200)
    x2p = np.random.uniform(size=200)

    y = []
    for x1, x2 in zip(x1p, x2p):
        y.append(
            sigmoid(x1 + 2*x2) + 0.5 * (x1 - x2)**2 + 0.5 * np.random.standard_normal()
        )
    return x1p, x2p, y


In [5]:
x1, x2, y = generate_points()

x = [[a, b, c] for a, b, c in zip(x1, x2, y)]
x = np.array(x)

df = []

for i in range(1000):
    ri = randrange(len(x1))
    df.append(
        x[ri]
    )

train, test = train_test_split(df, test_size=0.3)

x_train        = [[x1, x2] for (x1, x2, y) in train]
train_examples = [[y] for (x1, x2, y) in train]

x_test         = [[x1, x2] for (x1, x2, y) in test]
test_examples  = [[y] for (x1, x2, y) in test]

x_train        = np.array(x_train)
train_examples = np.array(train_examples)
x_test         = np.array(x_test)
test_examples  = np.array(test_examples)

## Sem Weight Decay

In [6]:
mlp = MLP(2, [10], 1)
epochs = 1000
learning_rate = 0.05
# weight_decay = 0.01

# train network
all_mse = mlp.train(
    x_train,
    train_examples,
    x_test,
    test_examples,
    epochs, 
    learning_rate, 
    # weight_decay
)

KeyboardInterrupt: 

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(12,5))
plt.title("Loss - Mean Squared Error")
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['train'], 
    label='Train'
)
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['test'], 
    label='Test'
)
plt.xlabel('epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

## Usando Weight Decay 0.01

In [None]:

# create a Multilayer Perceptron with one hidden layer
mlp = MLP(2, [10], 1)
epochs = 1000
learning_rate = 0.05
weight_decay = 0.01

# train network
all_mse = mlp.train(
    x_train,
    train_examples,
    x_test,
    test_examples,
    epochs, 
    learning_rate, 
    weight_decay
)

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(12,5))
plt.title("Loss - Mean Squared Error")
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['train'], 
    label='Train'
)
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['test'], 
    label='Test'
)
plt.xlabel('epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

## Usando Weight Decay 0.05

In [None]:
# create a Multilayer Perceptron with one hidden layer
mlp = MLP(2, [10], 1)
epochs = 1000
learning_rate = 0.05
weight_decay = 0.05

# train network
all_mse = mlp.train(
    x_train,
    train_examples,
    x_test,
    test_examples,
    epochs, 
    learning_rate, 
    weight_decay
)

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(12,5))
plt.title("Loss - Mean Squared Error")
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['train'], 
    label='Train'
)
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['test'], 
    label='Test'
)
plt.xlabel('epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

## Usando Weight Decay 0.001

In [None]:
# create a Multilayer Perceptron with one hidden layer
mlp = MLP(2, [10], 1)
epochs = 1000
learning_rate = 0.05
weight_decay = 0.001

# train network
all_mse = mlp.train(
    x_train,
    train_examples,
    x_test,
    test_examples,
    epochs, 
    learning_rate, 
    weight_decay
)

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(12,5))
plt.title("Loss - Mean Squared Error")
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['train'], 
    label='Train'
)
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['test'], 
    label='Test'
)
plt.xlabel('epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

## Usando Weight Decay 0.005

In [None]:
# create a Multilayer Perceptron with one hidden layer
mlp = MLP(2, [10], 1)
epochs = 1000
learning_rate = 0.05
weight_decay = 0.005

# train network
all_mse = mlp.train(
    x_train,
    train_examples,
    x_test,
    test_examples,
    epochs, 
    learning_rate, 
    weight_decay
)

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(12,5))
plt.title("Loss - Mean Squared Error")
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['train'], 
    label='Train'
)
plt.plot( 
    [x+1 for x in range(epochs)],
    all_mse['test'], 
    label='Test'
)
plt.xlabel('epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()