In [1]:
import numpy as np

In [2]:
## Forward Activations
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

## Backward Activations
def sigmoid_backward(dA, Z):
    sigmoid_Z = sigmoid(Z)
    return dA * sigmoid_Z * (1 - sigmoid_Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <=0] = 0
    return dZ
    
## Initialize Layers
def initialize_layers(network_architecture):
    number_of_layers = len(network_architecture)
    parameters = {}
    
    for index, layer in enumerate(network_architecture):
        layer_index = index + 1
        layer_input_size = layer["input_dimension"]
        layer_output_size = layer["output_dimension"]
        
        ##### remove *0.1  to parameter initialization #####
        parameters['W' + str(layer_index)] = np.random.randn(layer_output_size, layer_input_size)*0.01
        parameters['b' + str(layer_index)] = np.random.randn(layer_output_size, 1)*0.01
        
    return parameters

## Forward Propagation

def single_layer_forward_propagation(A_previous, W_current, b_current, activation="relu"):
    Z_current = np.dot(W_current, A_previous) + b_current
    
    if activation is "relu":
        activation_function = relu
    elif activation is "sigmoid":
        activation_function = sigmoid
    else:
        raiseException('Unsupported activation')

    return activation_function(Z_current), Z_current

def full_forward_propagation(X, parameters, network_architecture):
    memory = {}
    A_current = X
    
    for index, layer in enumerate(network_architecture):
        layer_index = index + 1
        A_previous = A_current
        
        activation_function_current = layer["activation"]
        W_current = parameters["W" + str(layer_index)]
        b_current = parameters["b" + str(layer_index)]
        A_current, Z_current = single_layer_forward_propagation(A_previous, W_current, b_current, activation_function_current)

        memory["A" + str(index)] = A_previous
        memory["Z" + str(layer_index)] = Z_current
        
    return A_current, memory

## Loss
##### Check for correct implementation #####
def MSE(Y_hat, Y):
    loss = np.power(Y_hat - Y,2).mean()
    return loss

## Performance Metric
def l2_distance(Y_hat, Y):
    distance = np.sqrt(np.power(Y_hat - Y,2))
    return distance.mean()

## Backward Propagation
##### MIGHT BE CHANGED TO FOLLOW DISCUSSION'S IMPLEMENTATION #####
def single_layer_backward_propagation(dA_current, W_current, b_current, Z_current, A_previous, activation="relu"):
    m = A_previous.shape[1]
    
    if activation is "relu":
        backward_activation_function = relu_backward
    elif activation is "sigmoid":
        backward_activation_function = sigmoid_backward
    else:
        raise Exception('Unsupported activation')
        
    dZ_current = backward_activation_function(dA_current, Z_current)
    
    dW_current = np.dot(dZ_current, np.transpose(A_previous)) / m
    db_current = np.sum(dZ_current, axis=1, keepdims=True) / m
    dA_previous = np.dot(np.transpose(W_current), dZ_current)
    
    return dA_previous, dW_current, db_current

def full_backward_propagation(Y_hat, Y, memory, parameters, network_architecture):
    gradients = {}
    
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
    
    dA_previous = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))
    
    for layer_index_previous, layer in reversed(list(enumerate(network_architecture))):
        layer_index_current = layer_index_previous + 1
        activation_function_current = layer["activation"]
        
        dA_current = dA_previous
        
        A_previous = memory["A" + str(layer_index_previous)]
        Z_current = memory["Z" + str(layer_index_current)]
        
        W_current = parameters["W" + str(layer_index_current)]
        b_current = parameters["b" + str(layer_index_current)]
        
        dA_previous, dW_current, db_current = single_layer_backward_propagation(dA_current, W_current, b_current, Z_current, A_previous, activation_function_current)
        
        gradients["dW" + str(layer_index_current)] = dW_current
        gradients["db" + str(layer_index_current)] = db_current
        
    return gradients

def update(parameters, gradients, network_architecture, learning_rate):
    
    for layer_index, layer in enumerate(network_architecture, 1):
        parameters["W" + str(layer_index)] -= learning_rate * gradients["dW" + str(layer_index)]
        parameters["b" + str(layer_index)] -= learning_rate * gradients["db" + str(layer_index)]
    
    return parameters

def train(X, Y, network_architecture, epochs, learning_rate):
    parameters = initialize_layers(network_architecture)
    loss_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cache = full_forward_propagation(X, parameters, network_architecture)
        
        ##### loss should be MSE ######
        loss = MSE(Y_hat, Y)
        loss_history.append(loss)
        
        accuracy = l2_distance(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        gradients = full_backward_propagation(Y_hat, Y, cache, parameters, network_architecture)
        
        parameters = update(parameters, gradients, network_architecture, learning_rate)
        
        print("Iteration: {:02} - loss: {:.5f} - l2-distance: {:.5f}".format(i+1, loss, accuracy))
        
    return parameters

In [3]:
## Network Architecture
network_architecture = [
    {"input_dimension":1, "output_dimension":64, "activation":"relu"},
    {"input_dimension":64, "output_dimension":64, "activation":"relu"},
    {"input_dimension":64, "output_dimension":1, "activation":"sigmoid"}
]

In [7]:
mean_text = input("Enter Mean:")
mean = float(mean_text)

standard_deviation_text = input("Enter Standard Deviation:")
standard_deviation = float(standard_deviation_text)

number_of_samples = 1000
epochs = 20
learning_rate = 0.1

#### Fix Dataset Generation - should be between [-2*std,2*std] #####
#### Recode 

X = np.random.uniform(mean - 2*standard_deviation, mean + 2*standard_deviation,(1000,1))
Y = 1/(standard_deviation*np.sqrt(2*np.pi)) * np.exp(-(X - mean)/(-2*np.square(standard_deviation)))

X_train = []
Y_train = []
X_test = []
Y_test = []

for i in range(number_of_samples):
    if i % 10 == 0:
        X_test.append(X[i])
        Y_test.append(Y[i])
    else:
        X_train.append(X[i])
        Y_train.append(Y[i])

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)


Enter Mean:1
Enter Standard Deviation:1
[[ 1.29463673e+00]
 [ 1.97658923e+00]
 [ 2.11688731e+00]
 [ 2.24623125e+00]
 [ 7.42161744e-01]
 [ 1.38341985e+00]
 [-8.72505111e-01]
 [ 1.00706553e+00]
 [ 2.42445483e+00]
 [ 2.04117865e+00]
 [-2.77676933e-01]
 [-7.34998368e-01]
 [ 6.81826495e-01]
 [-6.00889942e-01]
 [ 1.38253981e+00]
 [ 7.46396926e-02]
 [ 4.84692761e-01]
 [ 2.83315590e+00]
 [ 9.24405290e-01]
 [ 8.97026740e-01]
 [-4.13869672e-01]
 [ 1.44597261e+00]
 [ 6.15096444e-01]
 [ 2.06049213e+00]
 [ 6.80937226e-01]
 [ 3.88060209e-01]
 [ 7.18967439e-01]
 [ 2.61578426e+00]
 [ 6.18257088e-01]
 [ 2.68246318e+00]
 [ 1.69584442e+00]
 [-9.78121697e-01]
 [ 2.91221946e-01]
 [ 9.43845682e-01]
 [-9.65845900e-01]
 [ 1.37742663e+00]
 [ 1.61849714e+00]
 [ 2.66497326e+00]
 [ 1.39545532e+00]
 [-7.54139982e-01]
 [ 1.58330780e+00]
 [ 2.04782495e+00]
 [ 1.79790151e+00]
 [ 2.08269650e+00]
 [ 2.04684945e+00]
 [ 1.65489681e+00]
 [ 1.10139909e+00]
 [ 2.05976215e+00]
 [ 8.00559235e-01]
 [-8.75572995e-01]
 [-8.73152

In [5]:
# Training
parameters = train(np.transpose(X_train), np.transpose(Y_train), network_architecture, epochs, learning_rate)

Iteration: 01 - loss: 0.06838 - l2-distance: 0.22718
Iteration: 02 - loss: 0.06828 - l2-distance: 0.22689
Iteration: 03 - loss: 0.06819 - l2-distance: 0.22662
Iteration: 04 - loss: 0.06811 - l2-distance: 0.22635
Iteration: 05 - loss: 0.06802 - l2-distance: 0.22609
Iteration: 06 - loss: 0.06795 - l2-distance: 0.22583
Iteration: 07 - loss: 0.06787 - l2-distance: 0.22559
Iteration: 08 - loss: 0.06780 - l2-distance: 0.22536
Iteration: 09 - loss: 0.06773 - l2-distance: 0.22513
Iteration: 10 - loss: 0.06767 - l2-distance: 0.22492
Iteration: 11 - loss: 0.06761 - l2-distance: 0.22470
Iteration: 12 - loss: 0.06755 - l2-distance: 0.22450
Iteration: 13 - loss: 0.06750 - l2-distance: 0.22430
Iteration: 14 - loss: 0.06744 - l2-distance: 0.22411
Iteration: 15 - loss: 0.06740 - l2-distance: 0.22392
Iteration: 16 - loss: 0.06735 - l2-distance: 0.22374
Iteration: 17 - loss: 0.06730 - l2-distance: 0.22356
Iteration: 18 - loss: 0.06726 - l2-distance: 0.22339
Iteration: 19 - loss: 0.06722 - l2-distance: 0

In [6]:
# Prediction
Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), parameters, network_architecture)

# Accuracy achieved on the test set
acc_test = l2_distance(Y_test_hat, np.transpose(Y_test))
print("Test Set Distance: {:.5f}".format(acc_test))

Test Set Distance: 0.21514
