In [1]:
import pandas as pd
import numpy as np

import matplotlib as m_plt
from matplotlib import pyplot as plt

from copy import copy
from pprint import pprint

import sys
import os

import personal_plotter as p_plt

# Autoasociator

## 1. Data Generation

In [2]:
dummy_shape = (10)
dummy_data = np.identity(dummy_shape)
print(dummy_data)

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


### 1.1 Data Sampling

We are gonna use our dummy data (before generated) as a template, being the input and objective data equal is not necessary to create another dummy data for the objective one.

In [3]:
input_data = dummy_data.copy()
temp_shape = copy(input_data.shape)
input_data = list(map(lambda x : np.matrix(x), np.split(input_data.flatten(), temp_shape[1])))

objetive_data = dummy_data.copy()
temp_shape = copy(objetive_data.shape)
objetive_data = list(map(lambda x : np.matrix(x), np.split(objetive_data.flatten(), temp_shape[1])))

## 2. Neural Network

### 2.1 Arquitecture

The autoencoder is symmetric MLP, where the for every input, the target output is himself,  why symmetric?; because the architecture looks like:

$$ n_{(x)} , a_{(1)}, a_{(2)}, ..., a_{(m)}, a_{(m-1)}, ..., a_{(1)},n_{(y)} $$

#### 2.1.1 Input Layer

In this case, the input layer has 10 neurons ***(10 cases with 10 values each one)***

#### 2.1.2 Hiden Layer

By problem condition, it has a single hidden layer, with $ \large{\log_{2}^{\eta{(n_{(x)})}}} $ neurons, considering $ \eta{(n_{(x)})} = 10$ , the hidden layer is $\log_{2}^{10} = 3.219 $ , but $ \eta{(x_{i})} \in \mathbb{N}$ , then $ \lceil 3.219 \rceil = 4 $.

**Note:** $ \eta{(x_{i})} = n_{i} $ is the number of neurons in the layer $x_{i}$

#### 2.1.3 Output Layer

Considering the autoencoder is symmetric, the output layer also has to be 10 neurons in size.

---
___
---

### 2.2 Functions ( Training Procedure)



#### 2.2.1 Activation Function

Being an MLP, we are gonna use a classic architecture (see \[2.1.2\] Section), thus, the activation function is gonna be the Sigmoid function :

$$ f(x) = \frac{1}{1 + e^{-x}} $$


In [4]:
def sigmoid(x_val):
    return 1 / (1 + np.exp(-x_val))

#### 2.2.2 Forward

The ***feed forward*** is the transmission of inputs (layer to layer), considering the architecture, is possible to make it vectorize  :

$$ \Large{ {x_{i+1}}_{(1,n)}= F_{net}( {x_{i}}_{(1,m)} * {w_{i}}_{(m,n)} + {\theta_{i}}_{(1,l)}) } $$

Note : $$ F_{net} = \frac{1}{1 + e^{-x}} $$

In [5]:
def forward_engine(input_data, objective_data, weigth_data_container, 
                   theta_container,kernel=sigmoid, 
                   verbose=False, *args):
    
    output = [ None for _ in range(len(weigth_data_container)) ]
    
    relative_layer = input_data.copy()
    for index in range(len(weigth_data_container)):
            
            if verbose:
                print(relative_layer.shape, weigth_data_container[index].shape, theta_container[index].shape)
            
            carry =  kernel((relative_layer *  weigth_data_container[index]) + theta_container[index])
            output[index] = carry.copy()
            
            relative_layer = carry.copy()
    
    return output

#### 2.2.3 Gradient Derivates

$$ \large{f(x) = \frac{1}{1 + e^{-x}} \,:\, \rightarrow \frac{\mathop{d (f(x))}}{\mathop{d(x)}} = f(x) \cdot ( 1 - f(x) )} $$

In [6]:
def d_sigmoid( x_val, mode="layer"):
    if mode == "layer":
        return np.multiply(x_val , (x_val - 1))
    elif mode == "input":
        temp = sigmoid(x_val)
        return np.multiply(temp, (temp - 1))        

#### 2.2.4 Backward

The ***feed backward*** process tries to adjust the **weight matrix** by transmitting the error obtained in the last layer  $ E(t_j - o_j) $ using the gradient descent to minimize the error :

    $$ \Large{ \nabla \cdot E = 0 } $$

In [7]:
def backward_engine(input_data, objective_data, weigth_data_container, theta_container,
                    forward_data_container, derivate=d_sigmoid,
                    learning_rate=0.05, verbose=False, *args):
    
    output = copy(weigth_data_container)
    t_output = copy(theta_container)
    
    last_layer = True
    for index in range( len(weigth_data_container) - 1, -1, -1 ):
        
            if last_layer:
                
                e = np.multiply((forward_data_container[index] - objective_data) , derivate(forward_data_container[index]))
                
                output[index] += learning_rate * ( e.T * forward_data_container[index - 1]).T
                
                t_output[index] += learning_rate * e
                
                last_layer = False
                
                if verbose:
                    print("output layer")
            else:
                
                if index > 0:
                    e = np.multiply( (e * weigth_data_container[index + 1].T) , derivate(forward_data_container[index]))
                                        
                    output[index] += learning_rate * np.multiply( e.T , forward_data_container[index - 1]).T
                    
                    t_output[index] += learning_rate * e
                    
                    if verbose:
                        print("in between layers ", index)
                    
                else:
                    e = np.multiply( (e * weigth_data_container[index + 1].T) , derivate(forward_data_container[index]))
                    
                    output[index] += learning_rate * np.multiply( e.T , input_data).T
                    
                    t_output[index] += learning_rate * e
                    
                    if  verbose:
                        print("input layer")
                        
            if verbose:
                print("e \t")
                print(e)
                print(e.T.shape)
                
                print("Forwared Shape \t")
                print(forward_data_container[index - 1].shape)
                
                print("Weigth \t")
                print (output[index].shape)
                print(output[index])
                
                print("Theta \t")
                print (t_output[index].shape)
                print(t_output[index])
    
    return output, t_output

#### 2.2.5 Engine

Iterates over the cases (inputs), trying to minimize the error in the obtained result (more close to the objective result). This iteration is called ***Epochs***, one single epoch is the processing (forward) and subsequent weight adjust (backward) of every data. 

In [12]:
def MLP_engine(input_data, objetive_data,
               layer_set_up = [10,4,10],
               theta = False,
               function_set=[sigmoid, d_sigmoid],
               learning_rate=0.05, epochs=int(1e3), threshold=0.05,
               verbose=False):
    
    shape_container = layer_set_up
    
    weigth_data_container = [ np.random.sample((shape_container[ii], shape_container[ii + 1])) + 1
                             for ii in range(len(shape_container[:-1])) ]

    if theta:
        theta_container = list(map(lambda x : np.matrix(x), [ np.random.sample(ii)  + 1 for ii in shape_container[1:]]))
    else:
        theta_container = list(map(lambda x : np.matrix(x), [ np.zeros(ii) for ii in shape_container[1:] ]))
    
    if verbose:
        print("weigth", weigth_data_container)
        print("theta", theta_container)
    
    iters = 0
    while epochs != 0 :
        epoch_cum = 0
        
        for id_data in range(len(input_data)):
            
            forward_data_container = forward_engine(input_data[id_data], objetive_data[id_data],
                                                    weigth_data_container, theta_container,
                                                    kernel=function_set[0],
                                                    verbose=verbose)
            
            if  verbose:
                print(forward_data_container)
            
            weigth_data_container, theta_container = backward_engine(input_data[id_data], objetive_data[id_data],
                            weigth_data_container, theta_container,
                            forward_data_container, derivate=function_set[1],
                            learning_rate=learning_rate,
                                                    verbose=verbose)
            
            epoch_cum += np.sqrt(np.sum(np.power(objetive_data[id_data] - forward_data_container[-1], 2))/ len(forward_data_container[-1]))
            #epoch_cum += objetive_data[id_data] - forward_data_container[-1]
        
        if iters < 1000:
            if epochs % 50 == 0:
                print("Epoch :> {:d} \n\tError :> {:.4f} %".format(iters, (epoch_cum / len(input_data)) * 100))
        
        if epochs%10000 == 0:
            print("Epoch :> {:d} \n\tError :> {:.4f} %".format(iters, (epoch_cum / len(input_data)) * 100))
            
        if epoch_cum / len(input_data) < threshold:
            break
        else:
            pass
        
        iters += 1
        epochs -= 1
    
    print("Total of Epochs {:d} \n\tError :> {:.4f} %".format(iters, (epoch_cum / len(input_data)) * 100))
    return weigth_data_container, theta_container, shape_container

### 2.3 Test

#### 2.3.1 Case

$$ \begin{bmatrix}
        1 \\
        1 \\
        0 \\
        0 
    \end{bmatrix} \land \begin{bmatrix}
        1 \\
        0 \\
        1 \\
        0
    \end{bmatrix} = \begin{bmatrix}
        1 \\
        0 \\
        0 \\
        0
    \end{bmatrix}
$$

#### 2.3.2 Data

$$
a = \begin{bmatrix}
        1 & 1 \\
        1 & 0 \\
        0 & 1 \\
        0 & 0
    \end{bmatrix}
$$

$$ b = \begin{bmatrix}
        1 \\
        0 \\
        0 \\
        0
    \end{bmatrix} $$

#### 2.3.3 Training

In [19]:
a = [[1,1], [1,0], [0,1], [0,0]]
a = list(map(lambda x: np.matrix(x), a))
print(a)
b = [1, 0, 0, 0]
b = list(map(lambda x: np.matrix(x), b))
print(b)

w_d, t_d, s_d = MLP_engine(a, b,
                           layer_set_up=[2, 1], theta=True,
                           epochs=int(1e3), learning_rate=1,
                           verbose=False)

[matrix([[1, 1]]), matrix([[1, 0]]), matrix([[0, 1]]), matrix([[0, 0]])]
[matrix([[1]]), matrix([[0]]), matrix([[0]]), matrix([[0]])]
Epoch :> 0 
	Error :> 68.5096 %
Epoch :> 50 
	Error :> 34.7140 %
Epoch :> 100 
	Error :> 31.9263 %
Epoch :> 150 
	Error :> 30.4950 %
Epoch :> 200 
	Error :> 29.6567 %
Epoch :> 250 
	Error :> 29.1079 %
Epoch :> 300 
	Error :> 28.7188 %
Epoch :> 350 
	Error :> 28.4269 %
Epoch :> 400 
	Error :> 28.1984 %
Epoch :> 450 
	Error :> 28.0138 %
Epoch :> 500 
	Error :> 27.8609 %
Epoch :> 550 
	Error :> 27.7316 %
Epoch :> 600 
	Error :> 27.6206 %
Epoch :> 650 
	Error :> 27.5239 %
Epoch :> 700 
	Error :> 27.4387 %
Epoch :> 750 
	Error :> 27.3629 %
Epoch :> 800 
	Error :> 27.2949 %
Epoch :> 850 
	Error :> 27.2334 %
Epoch :> 900 
	Error :> 27.1775 %
Epoch :> 950 
	Error :> 27.1263 %
Total of Epochs 1000 
	Error :> 27.0802 %


#### 2.3.4 Verification

In [20]:
for i in range(4):
    print("\n\n____________***____________\n\n")
    print("\t\tCASE", i)
    print("\tA\t::>>\n", a[i].T)
    print("\tB\t::>>\t", b[i])
    
    temp = forward_engine(a[i], b[i], w_d, t_d)
    
    print("\n\tRAW Result\n")
    print(temp)
    print("\t\nBinary Result\n \t\t", end="")
    print( round( float(temp[0]) , 0), end="\n")



____________***____________


		CASE 0
	A	::>>
 [[1]
 [1]]
	B	::>>	 [[1]]

	RAW Result

[matrix([[0.00316629]])]
	
Binary Result
 		0.0


____________***____________


		CASE 1
	A	::>>
 [[1]
 [0]]
	B	::>>	 [[0]]

	RAW Result

[matrix([[0.0095951]])]
	
Binary Result
 		0.0


____________***____________


		CASE 2
	A	::>>
 [[0]
 [1]]
	B	::>>	 [[0]]

	RAW Result

[matrix([[0.01945274]])]
	
Binary Result
 		0.0


____________***____________


		CASE 3
	A	::>>
 [[0]
 [0]]
	B	::>>	 [[0]]

	RAW Result

[matrix([[0.05705669]])]
	
Binary Result
 		0.0


## 3. Autoencoder

### 3.1 Training

In [21]:
w_d, t_d, s_d = MLP_engine(input_data, objetive_data,
                           layer_set_up=[10, 4, 10], theta=True,
                           epochs=int(1e4), learning_rate=0.5,
                           verbose=False)

Epoch :> 0 
	Error :> 299.7591 %
Epoch :> 0 
	Error :> 299.7591 %
Epoch :> 50 
	Error :> 255.1469 %
Epoch :> 100 
	Error :> 156.0187 %
Epoch :> 150 
	Error :> 126.7534 %
Epoch :> 200 
	Error :> 95.6159 %
Epoch :> 250 
	Error :> 95.6176 %
Epoch :> 300 
	Error :> 95.6190 %
Epoch :> 350 
	Error :> 95.6200 %
Epoch :> 400 
	Error :> 95.6204 %
Epoch :> 450 
	Error :> 95.6202 %
Epoch :> 500 
	Error :> 95.6193 %
Epoch :> 550 
	Error :> 95.6178 %
Epoch :> 600 
	Error :> 95.6155 %
Epoch :> 650 
	Error :> 95.6128 %
Epoch :> 700 
	Error :> 95.6098 %
Epoch :> 750 
	Error :> 95.6069 %
Epoch :> 800 
	Error :> 95.6043 %
Epoch :> 850 
	Error :> 95.6023 %
Epoch :> 900 
	Error :> 95.6011 %
Epoch :> 950 
	Error :> 95.6007 %


KeyboardInterrupt: 

### 3.2 Verification

In [None]:
print("Input\n")
print(input_data[0])

temp = forward_engine(input_data[0], objetive_data[0], w_d, t_d)

print(temp)
print("Result\n")
print(temp[-1])
print("Objective\n")
print(objetive_data[0])

## 4. Conclusion

