In [1]:
import pandas as pd
import numpy as np

import matplotlib as m_plt
from matplotlib import pyplot as plt

from copy import copy
from pprint import pprint

import sys
import os

import personal_plotter as p_plt

# Autoasociator

## 1. Data Generation

In [2]:
dummy_shape = (10)
dummy_data = np.identity(dummy_shape)
print(dummy_data)

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


### 1.1 Data Sampling

We are gonna use our dummy data (before generated) as a template, being the input and objective data equal is not necessary to create another dummy data for the objective one.

In [3]:
input_data = dummy_data.copy()
temp_shape = copy(input_data.shape)
input_data = list(map(lambda x : np.matrix(x), np.split(input_data.flatten(), temp_shape[1])))

objetive_data = dummy_data.copy()
temp_shape = copy(objetive_data.shape)
objetive_data = list(map(lambda x : np.matrix(x), np.split(objetive_data.flatten(), temp_shape[1])))

## 2. Neural Network

### 2.1 Arquitecture

The autoencoder is symmetric MLP, where the for every input, the target output is himself,  why symmetric?; because the architecture looks like:

$$ n_{(x)} , a_{(1)}, a_{(2)}, ..., a_{(m)}, a_{(m-1)}, ..., a_{(1)},n_{(y)} $$

#### 2.1.1 Input Layer

In this case, the input layer has 10 neurons ***(10 cases with 10 values each one)***

#### 2.1.2 Hiden Layer

By problem condition, it has a single hidden layer, with $ \large{\log_{2}^{\eta{(n_{(x)})}}} $ neurons, considering $ \eta{(n_{(x)})} = 10$ , the hidden layer is $\log_{2}^{10} = 3.219 $ , but $ \eta{(x_{i})} \in \mathbb{N}$ , then $ \lceil 3.219 \rceil = 4 $.

**Note:** $ \eta{(x_{i})} = n_{i} $ is the number of neurons in the layer $x_{i}$

#### 2.1.3 Output Layer

Considering the autoencoder is symmetric, the output layer also has to be 10 neurons in size.

---
___
---

### 2.2 Functions ( Training Procedure)



#### 2.2.1 Activation Function

Being an MLP, we are gonna use a classic architecture (see \[2.1.2\] Section), thus, the activation function is gonna be the Sigmoid function :

$$ f(x) = \frac{1}{1 + e^{-x}} $$


In [4]:
def sigmoid(x_val):
    return 1 / (1 + np.exp(-x_val))

#### 2.2.2 Forward




In [5]:
def forward_engine(input_data, objective_data, weigth_data_container, 
                   theta_container,kernel=sigmoid, 
                   verbose=True, *args):
    
    output = [ None for _ in range(len(weigth_data_container)) ]
    
    relative_layer = input_data.copy()
    for index in range(len(weigth_data_container)):
            
            if verbose:
                print(relative_layer.shape, weigth_data_container[index].shape, theta_container[index].shape)
            
            carry =  kernel((relative_layer *  weigth_data_container[index]) + theta_container[index])
            output[index] = carry.copy()
            
            relative_layer = carry.copy()
    
    return output

#### 2.2.3 Gradient Derivates
bla ble

In [6]:
def d_sigmoid( x_val, mode="layer"):
    if mode == "layer":
        return np.multiply(x_val , (x_val - 1))
    elif mode == "input":
        temp = sigmoid(x_val)
        return np.multiply(temp, (temp - 1))        

#### 2.2.4 Backward
bla ble

In [7]:
def backward_engine(input_data, objective_data, weigth_data_container, theta_container,
                    forward_data_container, derivate=d_sigmoid,
                    learning_rate=0.05, *args):
    
    output = copy(weigth_data_container)

    for index in range( len(weigth_data_container) - 1, -1, -1 ):
            if index == len(weigth_data_container) - 1:
                e = np.multiply((forward_data_container[index] - objective_data) , d_sigmoid(forward_data_container[index]))
                output[index] -= learning_rate * np.multiply( e , forward_data_container[index])
            else:
                e = np.multiply(e, d_sigmoid(forward_data_container[index + 1]))
                output[index] -= learning_rate * np.multiply( e , forward_data_container[index] )
    
    return output

#### 2.2.5 Engine
bla ble

In [8]:
def MLP_engine(input_data, objetive_data,
               layer_set_up = [4],
               theta = False,
               function_set=[sigmoid, d_sigmoid],
               learning_rate=0.05, epochs=int(1e3), threashold=0.05):
    
    shape_container = [len(input_data)] + layer_set_up + [len(objetive_data)]
    
    weigth_data_container = [ np.random.sample((shape_container[ii], shape_container[ii + 1]))
                             for ii in range(len(shape_container[:-1])) ]
    
    if theta:
        theta_container = list(map(lambda x : np.matrix(x), [ np.random.sample(ii) for ii in shape_container[1:]]))
    else:
        theta_container = list(map(lambda x : np.matrix(x), [ np.zeros(ii) for ii in shape_container[1:] ]))

    while epochs != 0 :
        
        for id_data in range(len(input_data)):
            
            forward_data_container = forward_engine(input_data[id_data], objetive_data[id_data],
                                                    weigth_data_container, theta_container,
                                                    kernel=function_set[0])
            
            print(forward_data_container)
            
            weigth_data_container = backward_engine(input_data[id_data], objetive_data[id_data],
                            weigth_data_container, theta_container,
                            forward_data_container, derivate=function_set[1],
                            learning_rate=learning_rate)
            
        epochs -= 1
            
    return weigth_data_container, theta_container, shape_container

## 2.3 Test

bla bla

In [9]:
w_d, t_d, s_d = MLP_engine(input_data, objetive_data)

print(input_data[0])
print(objetive_data[0])
print(forward_engine(input_data[0], objetive_data[0], w_d, t_d))

(1, 10) (10, 4) (1, 4)
(1, 4) (4, 10) (1, 10)
[matrix([[0.53838508, 0.66985921, 0.54228164, 0.61828576]]), matrix([[0.71953794, 0.7151201 , 0.7907564 , 0.76900069, 0.76102436,
         0.78237125, 0.72035707, 0.81704982, 0.83088811, 0.74635181]])]


ValueError: operands could not be broadcast together with shapes (1,10) (1,4) 