# Name: Harrison Gropper
# Date: 12/13/2022
# Section: 01:198:461:02
# Homework 3

## Libraries

In [1]:
import math # for sqrt, log, exponentials
import numpy as np # for vectorization and array
import random # for random simulation
import pandas as pd # for dataframe visualization
import matplotlib.pyplot as plt # for plotting data in a graph
import copy # for making predictions
from collections import OrderedDict # ordering dictionaries
import warnings # no annoying warnings

warnings.filterwarnings('ignore') # to ignore numpy's warnings

# Problem 1: Auto-Encoders

In [2]:
def generate_data_point(sigma):
    """
    Purpose:
    Generates a data point of at 30 dimensions.
    
    Parameters:
    sigma - a float number that alters our output, and adds more 
    noise (this should hinder the performance of our model)
    
    Returns:
    feature_vector - a list with a length of (dimensions + 1)
    where all elements are features
    """
    
    # intialize a feature vector of zeros
    feature_vector = np.zeros(30)
    
    # modifying x1
    feature_vector[0] = np.random.normal(0,1)
    
    # creating x4, x7, x10, x13, ... , x28
    indices_to_modify = np.array(list(range(4,28+3,3))) - 1
    for index in indices_to_modify:
        feature_vector[index] = feature_vector[index - 3] + np.random.normal(0,sigma**2)
    
    # modifying x2
    feature_vector[1] = feature_vector[0] + np.random.normal(0,sigma**2)
    
    # creating x5, x8, x11, ... , x29
    indices_to_modify = np.array(list(range(5,29+3,3))) - 1
    for index in indices_to_modify:
        feature_vector[index] = feature_vector[index - 3] + np.random.normal(0,sigma**2)
    
    # modifying x3
    feature_vector[2] = feature_vector[0] + np.random.normal(0,sigma**2)
    
    # creating x6, x9, x12, x15, ... , x30
    indices_to_modify = np.array(list(range(6,30+3,3))) - 1
    for index in indices_to_modify:
        feature_vector[index] = feature_vector[index - 3] + np.random.normal(0,sigma**2)
    
    return feature_vector

def generate_train_data_set(training_data_size = 5000, sigma = 0.10):
    """
    Purpose:
    To use the generate_data_point function to generate training
    data 
    
    Parameters:
    training_data_size - an integer specifying how many training data points
    you would like to generate
    
    sigma - a float number that alters our output
    
    Returns:
    x_train - ndarray with shape of (dimensions x number of data points)
    """
    
    # intialize our test and training data
    training_data = []
    
    # generating the training data
    for _ in range(0,training_data_size):
        training_data.append(generate_data_point(sigma))
        
    # putting our generated data into a numpy ndarray
    x_train = np.array(training_data)

    return x_train

# doing this so we do not have to calculate e
# everytime we run our activation function tanh
e = math.e

def tanh(z):
    """
    Purpose:
    Our activation function in the hidden layer
    
    Parameters:
    z - (30 x 1) vector containing random float values
    
    Returns:
    A value used for learning in the autoencoder
    """
    pos_power = e ** z
    neg_power = e ** -z
    
    return (pos_power - neg_power) / (pos_power + neg_power)

def ReLU(z):
    """
    Purpose:
    Our activation function in our output layer
    
    Parameters:
    z - (HIDDEN_NODES x 1) vector containing random float values
    
    Returns:
    A value without bounds (our predictions)
    """
    
    return max(0,max(z))

def calculate_loss(x_train, x_predicted):
    """
    Purpose:
    calculates the loss between the train data points
    and the predicted data points
    
    Parameters:
    x_train - (5000 x 30) dimensional array
    x_predicted - (5000 x 30) dimensional array
    
    Returns:
    loss - a float value indicating our error
    """
    
    # number of data points
    N = len(x_train)
    
    # calculating the loss
    loss = (1 / N) * np.sum( (np.linalg.norm((x_train - x_predicted)))**2 )
    
    return loss

def xavier_initialization(INPUT_NODES, HIDDEN_NODES, OUTPUT_NODES):
    """
    Parameters: 
    INPUT_NODES - an integer representing the number of input nodes in the first layer
    HIDDEN_NODES - an integer representing the number if hidden nodes in the second layer
    OUTPUT_NODES - an integer representing the number of output nodes in the third layer
    
    Returns:
    w_vector - a ndarray or vector of shape ( (INPUT_NODES * HIDDEN_NODES) x HIDDEN_NODES)
    
    Purpose:
    To intialize weights for our neural network, but in this case
    our autoencoder
    """

    # calculating total number of weights in our neural network (Fully Connected Layers)
    number_of_weights = (INPUT_NODES * HIDDEN_NODES)

    # building our w_vector - first build a temporary vector with all weights equal to one 
    w_vector = np.ones(number_of_weights)

    # calculating the weights with Xavier intialization
    for index in range(0, number_of_weights):

        num_in = 0 # number of in nodes
        num_out = HIDDEN_NODES # number of out nodes

        a = -1 * math.sqrt(6/(num_in + num_out)) # beginning of interval
        b = -a # end of interval 

        new_weight = np.random.uniform(a,b) # calculating our new weight
        w_vector[index] = new_weight # putting our new weight into our weight vector

        
    # grouping the vector for each node
    w_vector = np.array(np.split(w_vector, HIDDEN_NODES))
    
    # returning our weight vector 
    return w_vector 

def foward_propogation(input_layer, weights, HIDDEN_NODES):
    """
    Parameters:
    input_layer - ndarray of shape (INPUT_NODES x 1)
    weights - a ndarray or vector of shape ( (INPUT_NODES * HIDDEN_NODES) x 1)
    HIDDEN_NODES - an integer representing the number if hidden nodes in the second layer
    
    Returns:
    new_hidden_layer - ndarray of shape (HIDDEN_NODES x 1)
    new_out_layer - ndarray of shape (INPUT_NODES x 1)
    
    Purpose:
    To compute a new hidden layer based off the weights of our model
    To compute a new output layer with the newly computed hidden layer
    """    
    # This function is vectorized using numpy
    # for incredibly fast computation!!
    
    # one foward pass using our activation function tanh 
    new_hidden_layer = tanh(input_layer * weights)

    # getting output layer with our ReLU
    new_output_layer = np.apply_along_axis(ReLU, 1, new_hidden_layer.T)

    return (new_hidden_layer, new_output_layer)

In [17]:
K = 2 # 0th input layer, 1st hidden layer, 2nd output layer
INPUT_NODES = 30 # input layer
LIST_OF_HIDDEN_NODES = list(range(1,30+1)) # hidden layer
OUTPUT_NODES = 30 # output layer 
HIDDEN_NODES = LIST_OF_HIDDEN_NODES[14] # number of hidden nodes in the hidden layer

# generate our data for training
x_train = generate_train_data_set()

# random initialization - Xavier Edition
weights = xavier_initialization(INPUT_NODES, HIDDEN_NODES, OUTPUT_NODES)

In [18]:
for input_layer in x_train:
    hidden_layer, output_layer = foward_propogation(input_layer, weights, HIDDEN_NODES)

    initial_graidents = 2 * (output_layer - input_layer)
    hidden_gradients = np.apply_along_axis(np.sum, 1, (initial_graidents * np.dot(tanh(np.dot(weights,hidden_layer.T)), weights)).T)
    weights = weights - (hidden_gradients * 0.0001) * np.dot(tanh(np.dot(weights, hidden_layer.T)), hidden_layer)
    
loss = 0
for data_point in x_train:
    hidden_layer, output_layer = foward_propogation(data_point, weights, HIDDEN_NODES)
    loss += calculate_loss(data_point,output_layer)
loss

4936.511285796989

In [5]:
def back_propogation(output_layer, input_layer, alpha):
    """
    Parameters:
    
    Returns:
    new_weights = the new computed weights of our model
    that minimize our error
    
    Purpose:
    To update the weights of our model
    using SGD (stochastic gradient descent)
    """
    
    