## Libraries

In [1]:
import math # for sqrt, log, exponentials
import numpy as np # for vectorization and array
import random # for random simulation
import pandas as pd # for dataframe visualization
import matplotlib.pyplot as plt # for plotting data in a graph
import copy # for making predictions
from collections import OrderedDict # ordering dictionaries
import warnings # no annoying warnings

warnings.filterwarnings('ignore') # to ignore numpy's warnings

# Problem 1: Auto-Encoders

In [8]:
def generate_data_point(sigma):
    """
    Purpose:
    Generates a data point of at 30 dimensions.
    
    Parameters:
    sigma - a float number that alters our output, and adds more 
    noise (this should hinder the performance of our model)
    
    Returns:
    feature_vector - a list with a length of (dimensions + 1)
    where all elements are features
    """
    
    # intialize a feature vector of zeros
    feature_vector = np.zeros(30)
    
    # modifying x1
    feature_vector[0] = np.random.normal(0,1)
    
    # creating x4, x7, x10, x13, ... , x28
    indices_to_modify = np.array(list(range(4,28+3,3))) - 1
    for index in indices_to_modify:
        feature_vector[index] = feature_vector[index - 3] + np.random.normal(0,sigma**2)
    
    # modifying x2
    feature_vector[1] = feature_vector[0] + np.random.normal(0,sigma**2)
    
    # creating x5, x8, x11, ... , x29
    indices_to_modify = np.array(list(range(5,29+3,3))) - 1
    for index in indices_to_modify:
        feature_vector[index] = feature_vector[index - 3] + np.random.normal(0,sigma**2)
    
    # modifying x3
    feature_vector[2] = feature_vector[0] + np.random.normal(0,sigma**2)
    
    # creating x6, x9, x12, x15, ... , x30
    indices_to_modify = np.array(list(range(6,30+3,3))) - 1
    for index in indices_to_modify:
        feature_vector[index] = feature_vector[index - 3] + np.random.normal(0,sigma**2)
    
    # adding the bias term
    feature_vector = list(feature_vector)
    feature_vector.insert(0,1)
    
    return np.array(feature_vector)

def generate_train_data_set(training_data_size = 5000, sigma = 0.10):
    """
    Purpose:
    To use the generate_data_point function to generate training
    data 
    
    Parameters:
    training_data_size - an integer specifying how many training data points
    you would like to generate
    
    sigma - a float number that alters our output
    
    Returns:
    x_train - ndarray with shape of ((dimensions + 1) x number of data points)
    """
    
    # intialize our test and training data
    training_data = []
    
    # generating the training data
    for _ in range(0,training_data_size):
        training_data.append(generate_data_point(sigma))
        
    # putting our generated data into a numpy ndarray
    x_train = np.array(training_data)

    return x_train

# doing this so we do not have to calculate e
# everytime we run our activation function tanh
e = math.e

def tanh(z):
    """
    Purpose:
    Our activation function in the hidden layer
    
    Parameters:
    z - (30 x 1) vector containing random float values
    
    Returns:
    A value used for learning in the autoencoder
    """
    pos_power = e ** z
    neg_power = e ** -z
    
    return (pos_power - neg_power) / (pos_power + neg_power)

def ReLU(z):
    """
    Purpose:
    Our activation function in our output layer
    
    Parameters:
    z - (HIDDEN_NODES x 1) vector containing random float values
    
    Returns:
    A value without bounds (our predictions)
    """
    
    return max(0,max(z))

def calculate_loss(x_train, x_predicted):
    """
    Purpose:
    calculates the loss between the train data points
    and the predicted data points
    
    Parameters:
    x_train - (5000 x 30) dimensional array
    x_predicted - (5000 x 30) dimensional array
    
    Returns:
    loss - a float value indicating our error
    """
    
    # converting to numpy arrays
    x_train = np.array(x_train)
    x_predicted = np.array(x_predicted)
    
    # number of data points
    N = len(x_train)
    
    # calculating the loss
    loss = (1 / N) * np.sum( (np.linalg.norm((x_train - x_predicted)))**2 )
    
    return loss

def xavier_initialization(INPUT_NODES, HIDDEN_NODES, OUTPUT_NODES):
    """
    Parameters: 
    INPUT_NODES - an integer representing the number of input nodes in the first layer
    HIDDEN_NODES - an integer representing the number if hidden nodes in the second layer
    OUTPUT_NODES - an integer representing the number of output nodes in the third layer
    
    Returns:
    weights - a dictionary that holds two ndarrays (one for each layer except the output layer)
    
    Purpose:
    To intialize weights for our neural network, but in this case
    our autoencoder
    """

    # to hold the weights of the layers
    weights = {"Layer 1":None,"Layer 2":None}

    # building our w_vector - first build a temporary vector with all weights equal to one 
    # plus one for the bias weight
    layer_1_weights = np.ones( ((INPUT_NODES + 1) * HIDDEN_NODES) )
    layer_2_weights = np.ones( ((HIDDEN_NODES + 1) * OUTPUT_NODES) )

    # initializing the weights with Xavier intialization
    for index in range(0, len(layer_1_weights)):
            
        num_in = 0 # number of in nodes
        num_out = HIDDEN_NODES # number of out nodes, FC layers

        a = -1 * math.sqrt(6/(num_in + num_out)) # beginning of interval
        b = -a # end of interval 

        new_weight = np.random.uniform(a,b) # calculating our new weight
        layer_1_weights[index] = new_weight # putting our new weight into our weight vector
        
     # initializing the weights with Xavier intialization
    for index in range(0, len(layer_2_weights)):

        num_in = INPUT_NODES # number of in nodes
        num_out = HIDDEN_NODES # number of out nodes, FC layers

        a = -1 * math.sqrt(6/(num_in + num_out)) # beginning of interval
        b = -a # end of interval 

        new_weight = np.random.uniform(a,b) # calculating our new weight

        layer_2_weights[index] = new_weight # putting our new weight into our weight vector
        
    # assigning the weights to the weight dictionary
    weights["Layer 1"] = np.array(np.split(layer_1_weights, INPUT_NODES + 1))
    weights["Layer 2"] = np.array(np.split(layer_2_weights, HIDDEN_NODES + 1))
    
    # returning our weight dictionary
    return weights 

def forward_propogation(input_layer, weights):
    """
    Parameters:
    input_layer - ndarray of shape ((1 + INPUT_NODES) x 1)
    weights - a dictionary with ndarray of weights
    HIDDEN_NODES - an integer representing the number if hidden nodes in the second layer
    
    Returns:
    hidden_layer - ndarray of shape (1 + HIDDEN_NODES x 1)
    output_layer - ndarray of shape (INPUT_NODES x 1)
    
    Purpose:
    To compute a new hidden layer based off the weights of our model
    To compute a new output layer with the newly computed hidden layer
    """    
    # This function is vectorized using numpy
    # for incredibly fast computation!!

    # applying our weights to the input layer via dot product and the tanh activation function + bias term
    hidden_layer = np.insert(tanh(np.dot(input_layer,weights['Layer 1'])), 0 , 1)

    # applying our last weights
    output_layer = np.dot(hidden_layer,weights['Layer 2'])

    return (hidden_layer, output_layer)

def back_propogation(input_layer, hidden_layer, output_layer, weights, alpha):
    """
    Parameters:
    All the layers of the neural network and
    the learning rate alpha
    
    Returns:
    new_weights = the new computed weights of our model
    that minimize our error
    
    Purpose:
    To update the weights of our model
    using SGD (stochastic gradient descent)
    """
    
    # ~almost vectorized~ but not quite
    
    # updating the second set of weights (before the output)
    derivative_loss_to_node = (2 * (output_layer - input_layer[1:]))
    
    for index in range(0,len(hidden_layer)):
        weights['Layer 2'][index] -= alpha * hidden_layer[index] * derivative_loss_to_node

    # updating the first set of weights
    derivative_first_weight = (1 - tanh(np.dot(input_layer,weights['Layer 1'])) ** 2)
    
    for index in range(0,len(hidden_layer)):
        weights['Layer 1'][index] -= (alpha * derivative_first_weight * input_layer[index] * np.dot(derivative_loss_to_node,weights['Layer 2'][index]))
        
    return weights

def run_experiment(x_train, num_hidden_layers, trials):
    """
    Parameters:
    
    Returns:
    
    Purpose:
    """
    original_data = []
    predictions = []
    
    # random initialization - Xavier Edition
    weights = xavier_initialization(INPUT_NODES, HIDDEN_NODES, OUTPUT_NODES)
    
    # picking random points in the training data
    for _ in range (0,trials):

        input_layer = random.choice(x_train)

        hidden_layer, output_layer = forward_propogation(input_layer, weights)
        weights = back_propogation(input_layer, hidden_layer, output_layer, weights, alpha)

        # to calculate the loss - no counting the bias term
        original_data.append(input_layer[1:])
        predictions.append(output_layer)

    loss = calculate_loss(original_data,predictions)
    return loss

In [28]:
INPUT_NODES = 30 # input layer
HIDDEN_NODES = 1 # hidden layer
OUTPUT_NODES = 30 # output layer 
alpha = 0.0001 # learning rate

In [33]:
# generate our data for training
x_train = generate_train_data_set(sigma = 2)

In [34]:
run_experiment(x_train, num_hidden_layers = HIDDEN_NODES, trials = 10000)

2292.064440524413