# Universidad del Valle de Guatemala<br/>Introduccion a la Inteligencia Artificial
## Dieter de Wit 15146

# Hoja de Trabajo<br/>Redes Neuronales

In [209]:
# External Imports
import math
import pandas as pd
import numpy as np
import random as rd
import functools as ft
from scipy import optimize

# Internal Imports
import load_mnist as lm

## Preparamos la data para ser analizada 

In [210]:
# Loading the data in Mnist format
X_train, y_train = lm.load_mnist('data/fashion')

# Translate y_train into np.array
y_train = np.array([y_train]).T

# Construct the dataset with X_train and y_train values
dataset = np.append(X_train.astype(float), y_train, 1)

# Select a Training Sample
training_value = int(len(dataset)*0.6)
training = dataset[:training_value, :]

In [211]:
# Loading the data in CSV format
train_set = pd.read_csv('data/fashion-mnist_train.csv')
test_set = pd.read_csv('data/fashion-mnist_test.csv')

In [212]:
# Normalizing the data, only X values
x1 = train_set.iloc[:, 1:]/1000.0
m1, n1 = x1.shape

# Convert data from Train datasets into an array
X = np.asarray(x1)
m, n = X.shape

X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [213]:
# Presenting Y data in array
y = np.asarray(train_set.iloc[:, 0])
y = y.reshape(m, 1)
Y = (y == np.array(range(10))).astype(int)

Y

array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 1, 0, 0]])

## Preparar la estructura de la red neuronal

In [214]:
# Neural Network Structure 
NN_Structure = np.array([n, 99, 10])

# Thetas structure, later used for creating the Weight Matrix
theta_structure = np.hstack((
    NN_Structure[1:].reshape(len(NN_Structure) - 1, 1),
    (NN_Structure[:-1] + 1).reshape(len(NN_Structure) - 1, 1)
))

theta_structure
# Output shows:
# 1 Inner layer with 99 Neurons
# 1 Output Layer with 10 Neurons
# 1 Initial layer with 784 Neurons

array([[ 99, 785],
       [ 10, 100]])

## Preparar la estructura de los thetas a optimizar

In [215]:
# Create the Weight Matrix based on initial values for thetas
weight_matrix = [np.random.rand(*theta) for theta in theta_structure]

weight_matrix

[array([[0.95202301, 0.63406304, 0.8106702 , ..., 0.29735926, 0.20934463,
         0.51093317],
        [0.08205102, 0.3608362 , 0.4404341 , ..., 0.34437722, 0.38252149,
         0.50060532],
        [0.79012085, 0.59427233, 0.65302382, ..., 0.49636641, 0.96632502,
         0.94559169],
        ...,
        [0.32620488, 0.76304164, 0.93087987, ..., 0.58761384, 0.40104465,
         0.05699448],
        [0.34756962, 0.19129413, 0.20267145, ..., 0.94953158, 0.05911504,
         0.31974304],
        [0.43922009, 0.16817778, 0.84851547, ..., 0.4064552 , 0.10123785,
         0.63674218]]),
 array([[7.14838336e-01, 9.12245524e-01, 1.27760222e-01, 6.57377866e-01,
         6.32000964e-01, 7.31646568e-01, 6.12246878e-01, 1.86927993e-01,
         2.98988895e-02, 2.39257548e-01, 9.55286680e-01, 2.10790715e-01,
         7.40069347e-01, 4.07962568e-01, 8.70329899e-01, 4.85046411e-01,
         4.00600111e-01, 8.53843541e-03, 7.18476542e-01, 7.13952425e-01,
         8.65541877e-01, 4.16099439e-01, 8.2

In [216]:
# Separate arrays to flatten one by one
initial_to_inner_thetas = np.hstack(weight_matrix[0]).squeeze()
output_thetas = np.hstack(weight_matrix[1]).squeeze()

In [217]:
# Sum the flattened arrays to get the intial thetas
initial_thetas = np.hstack((initial_to_inner_thetas, output_thetas))

initial_thetas

array([0.95202301, 0.63406304, 0.8106702 , ..., 0.90512395, 0.26390307,
       0.30847356])

## Algoritmos necesarios para el entrenamiento

In [218]:
# Convert Array in Array of Arrays
def inflate_matrixes(initial_thetas, shapes):
    layers = len(shapes) + 1
    sizes = [shape[0] * shape[1] for shape in shapes]
    steps = np.zeros(layers, dtype=int)

    for i in range(layers - 1):
        steps[i + 1] = steps[i] + sizes[i]

    return [
        initial_thetas[steps[i]: steps[i + 1]].reshape(*shapes[i])
        for i in range(layers - 1)
    ]

In [219]:
# Logistics Sigmoid Function (1/(1 + e^-x)) for each value in input Z
def sigmoid(Z):
    a = [(1 / (1 + np.exp(-x))) for x in Z]
    return np.asarray(a).reshape(Z.shape)

In [220]:
# Feed Forward Model with activation 
def feed_forward_model(thetas, X):
    AL = [X]
    
    for i in range(len(thetas)):
        AL.append(
            sigmoid(
                np.matmul(
                    np.hstack((
                        np.ones(len(X)).reshape(len(X), 1),
                        AL[i]
                    )),
                    thetas[i].T
                )
            )
        )
    return AL

In [221]:
# Cost predicted-expected value
def cost_predict(initial_thetas, shapes, X, Y):
    ffm = feed_forward_model(
        inflate_matrixes(initial_thetas, shapes),
        X
    )
    ff_log = Y * np.log(ffm[-1]) + (1 - Y) * np.log(1 - ffm[-1])
    return -ff_log.sum() / len(X)

In [222]:
# Calculate the Gradient based on thetas, neural network shape and inputs(X, Y)
def model_backpropagation(initial_thetas, shapes, X, Y):
    m, layers = len(X), len(shapes) + 1
    thetas = inflate_matrixes(initial_thetas, shapes)
    ffm = feed_forward_model(thetas, X)
    deltas = [*range(layers - 1), ffm[-1] - Y]

    for i in range(layers - 2, 0, -1):
        deltas[i] =  (deltas[i + 1] 
                      @ 
                      np.delete(thetas[i], 0, 1)) * (ffm[i] * (1 - ffm[i]))

    propagate = []
    for n in range(layers - 1):
        propagate.append(
            (deltas[n + 1].T 
            @ 
            np.hstack((
                np.ones(len(ffm[n])).reshape(len(ffm[n]), 1),
                ffm[n]
            ))) / m
        )

    propagate = np.asarray(propagate)
    
    input_to_inner_deltas = np.hstack(propagate[0]).squeeze()
    output_deltas = np.hstack(propagate[1]).squeeze()
    
    return np.hstack((input_to_inner_deltas, output_deltas))

## Entrenamiento de la data, obtener el array de thetas entrenados

### No correr esta parte del codigo, a mi me tardo 18 horas en terminar con 1848 iteraciones lo cual retorno un 85.33% de precision con el Test_Set <br/>Por seguridad le cambie el No. de iteraciones a 10 y cambie el nombre del archivo a guardar por si lo quieren correr, pero va a dar con precision del 7-9%

In [223]:
# Inject initial_thetas and get optimized trained thetas 
trained = optimize.minimize(
    fun = cost_predict,
    x0 = initial_thetas,
    args = (theta_structure, X, Y),
    method = 'L-BFGS-B',
    jac = model_backpropagation,
    options = {'disp': True, 'maxiter': 10}
)

  ff_log = Y * np.log(ffm[-1]) + (1 - Y) * np.log(1 - ffm[-1])
  ff_log = Y * np.log(ffm[-1]) + (1 - Y) * np.log(1 - ffm[-1])


In [224]:
# Insert Optimized thetas into array
np.savetxt('trained_thetas_10.txt', trained.x)

## Utilizar las thetas entrenadas para predecir la data del Test Set

In [255]:
# Load the trained thetas from file into an array
optimized_thetas = np.loadtxt("trained_thetas_1848.txt")
# Transform array into array of arrays using the Neural Network Structure
thetas = inflate_matrixes(optimized_thetas, theta_structure)

# Organize dataset into arrays 
X_test = test_set.iloc[:, 1:] / 1000.0
m, n = X_test.shape
y_test = np.asarray(test_set.iloc[:, 0])
y_test = y_test.reshape(m, 1)
Y_test = (y_test == np.array(range(10))).astype(int)

# Test the data based on trained thetas matrix
ffm_trained = feed_forward_model(thetas, X_test)

In [256]:
predicted = 0
predicted_values = []
true_values = []

# Save Predicted Values in array
for prediction in ffm_trained[-1]:
    predicted_values.append(np.where(prediction == np.amax(prediction))[0])

# Save Real Values in array
for true_value in Y_test:
    true_values.append(np.where(true_value == np.amax(true_value))[0])

# Compare values from both arrays to find correct values
for value in range(len(X_test)):
    if predicted_values[value].item(0) == true_values[value].item(0):
        predicted += 1

print(round(100 * predicted/len(X_test), 2), "% of precision got, running with Testing Set")

85.33 % of precision got, running with Testing Set


### 85.33% de Precision con el Testing Set, se obtuvo mas del 87% utilizando el  dataset de entrenamiento pero no vale tanto la pena mostrar porque obviamente el Training Set elevara los valores predecidos.

### Para un poco de discusion podemos visualizar lo que la falta iteraciones en el entrenamiento afecta al modelo:

In [252]:
# Load the trained thetas from file into an array
optimized_thetas_try = np.loadtxt("trained_thetas_1200.txt")
# Transform array into array of arrays using the Neural Network Structure
thetas_try = inflate_matrixes(optimized_thetas_try, theta_structure)

# Test the data based on trained thetas matrix
ffm_trained_try = feed_forward_model(thetas_try, X_test)

predicted_try = 0
predicted_values_try = []
true_values_try = []

# Save Predicted Values in array
for prediction in ffm_trained_try[-1]:
    predicted_values_try.append(np.where(prediction == np.amax(prediction))[0])

# Save Real Values in array
for true_value in Y_test:
    true_values_try.append(np.where(true_value == np.amax(true_value))[0])

# Compare values from both arrays to find correct values
for value in range(len(X_test)):
    if predicted_values_try[value].item(0) == true_values_try[value].item(0):
        predicted_try += 1

print(round(100 * predicted_try/len(X_test), 2), "% of precision got, running with Testing Set on Poorly Trained NN")

72.19 % of precision got, running with Testing Set on Poorly Trained NN


Podemos analizar luego de esto que nuestro modelo es bastante de fiar para la prediccion de la base de datos minst, los algoritmos de Feed Forward y Back Propagation son los correctos, sin embargo, se podria encontrar aun muchos metodos para optimizarlos. En total, para lograr un 85.33 porciento de fidelidad del algoritmo con el Testing Set se requirieron 1848 iteraciones de optimize.minimize de scipy, lo cual duro un aproximado de 18 horas corriendo en una maquina core i7 de 7ma generacion, 16GB de RAM... Antes se realizaron bastantes intentos mas, intentando optimizar el algoritmo, casi todas las veces sin exito y regresando a versiones muy parecidas del original de Samuel. Una de las cosas que se pueden discutir, es lo que realiza el entrenamiento de la red y su impacto final, se empezo desde 300 iteraciones, unas 2-3 horas obteniendo 40-45% de precision, luego se aumento a 500 subiendo a 60-69% de presicion. Desde ese momento se analizo que se necesitaban muchas mas iteraciones, corri con 1200 pensando que lograria la cantidad correcta, arriba pueden ver el resultado. Finalmente corri con 2000, en donde al llegar a 1851 iteraciones el algoritmo empezo a reportar analizar sobre valores de f = NaN, y tardarse alrededor de 2-5minutos por iteracion. Se opto por repetir con 1848 iteraciones creyendo haber llegado a un error de Bias.