In [1]:
import numpy as np

In [2]:
## Forward Activations
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

## Backward Activations
def sigmoid_backward(dA, Z):
    sigmoid_Z = sigmoid(Z)
    return dA * sigmoid_Z * (1 - sigmoid_Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <=0] = 0
    return dZ
    
## Initialize Layers
def initialize_layers(network_architecture):
    number_of_layers = len(network_architecture)
    parameters = {}
    
    for index, layer in enumerate(network_architecture):
        layer_index = index + 1
        layer_input_size = layer["input_dimension"]
        layer_output_size = layer["output_dimension"]
        
        ##### remove *0.1  to parameter initialization #####
        parameters['W' + str(layer_index)] = np.random.randn(layer_output_size, layer_input_size) * 0.1
        parameters['b' + str(layer_index)] = np.random.randn(layer_output_size, 1) * 0.1
        
    return parameters

## Forward Propagation

def single_layer_forward_propagation(A_previous, W_current, b_current, activation="relu"):
    Z_current = np.dot(W_current, A_previous) + b_current
    
    if activation is "relu":
        activation_function = relu
    elif activation is "sigmoid":
        activation_function = sigmoid
    else:
        raiseException('Unsupported activation')

    return activation_function(Z_current), Z_current

def full_forward_propagation(X, parameters, network_architecture):
    memory = {}
    A_current = X
    
    for index, layer in enumerate(network_architecture):
        layer_index = index + 1
        A_previous = A_current
        
        activation_function_current = layer["activation"]
        W_current = parameters["W" + str(layer_index)]
        b_current = parameters["b" + str(layer_index)]
        A_current, Z_current = single_layer_forward_propagation(A_previous, W_current, b_current, activation_function_current)

        memory["A" + str(index)] = A_previous
        memory["Z" + str(layer_index)] = Z_current
        
    return A_current, memory

## Loss
##### Check for correct implementation #####
def MSE(Y_hat, Y):
    m =  Y_hat.shape[1]
    loss = 1/m * np.sum(np.power(Y_hat - Y,2))
    return np.squeeze(loss)
#     loss = np.power(Y_hat - Y,2).mean()
    
#     return loss
    
# def BCE(Y_hat, Y):
#     m = Y_hat.shape[1]
#     loss = -1/m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1-Y_hat).T))
    
#     return np.squeeze(loss)

## Convert Probabilities to Classes
##### Should be one hot vectors ######
def convert_probabilities_to_classes(probabilities):
    classes = np.copy(probabilities)
    classes[probabilities > 0.5] = 1
    classes[probabilities <= 0.5] = 0
    
    return classes
    
## Accuracy
def Accuracy(Y_hat, Y):
    Y_hat_class = convert_probabilities_to_classes(Y_hat)
    return (Y_hat_class == Y).all(axis=0).mean()
    
## Backward Propagation
##### MIGHT BE CHANGED TO FOLLOW DISCUSSION'S IMPLEMENTATION #####
def single_layer_backward_propagation(dA_current, W_current, b_current, Z_current, A_previous, activation="relu"):
    m = A_previous.shape[1]
    
    if activation is "relu":
        backward_activation_function = relu_backward
    elif activation is "sigmoid":
        backward_activation_function = sigmoid_backward
    else:
        raise Exception('Unsupported activation')
        
    dZ_current = backward_activation_function(dA_current, Z_current)
    
    dW_current = np.dot(dZ_current, np.transpose(A_previous)) / m
    db_current = np.sum(dZ_current, axis=1, keepdims=True) / m
    dA_previous = np.dot(np.transpose(W_current), dZ_current)
    
    return dA_previous, dW_current, db_current

def full_backward_propagation(Y_hat, Y, memory, parameters, network_architecture):
    gradients = {}
    
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
    
    dA_previous = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))
    
    for layer_index_previous, layer in reversed(list(enumerate(network_architecture))):
        layer_index_current = layer_index_previous + 1
        activation_function_current = layer["activation"]
        
        dA_current = dA_previous
        
        A_previous = memory["A" + str(layer_index_previous)]
        Z_current = memory["Z" + str(layer_index_current)]
        
        W_current = parameters["W" + str(layer_index_current)]
        b_current = parameters["b" + str(layer_index_current)]
        
        dA_previous, dW_current, db_current = single_layer_backward_propagation(dA_current, W_current, b_current, Z_current, A_previous, activation_function_current)
        
        gradients["dW" + str(layer_index_current)] = dW_current
        gradients["db" + str(layer_index_current)] = db_current
        
    return gradients

def update(parameters, gradients, network_architecture, learning_rate):
    
    for layer_index, layer in enumerate(network_architecture, 1):
        parameters["W" + str(layer_index)] -= learning_rate * gradients["dW" + str(layer_index)]
        parameters["b" + str(layer_index)] -= learning_rate * gradients["db" + str(layer_index)]
    
    return parameters

def train(X, Y, network_architecture, epochs, learning_rate):
    parameters = initialize_layers(network_architecture)
    loss_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cache = full_forward_propagation(X, parameters, network_architecture)
        
        ##### loss should be MSE ######
        loss = MSE(Y_hat, Y)
        loss_history.append(loss)
        
        accuracy = Accuracy(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        gradients = full_backward_propagation(Y_hat, Y, cache, parameters, network_architecture)
        
        parameters = update(parameters, gradients, network_architecture, learning_rate)
        
        print("Iteration: {:05} - loss: {:.5f} - accuracy: {:.5f}".format(i, loss, accuracy))
        
    return parameters

In [3]:
## Network Architecture
network_architecture = [
    {"input_dimension":1, "output_dimension":64, "activation":"relu"},
    {"input_dimension":64, "output_dimension":64, "activation":"relu"},
    {"input_dimension":64, "output_dimension":1, "activation":"sigmoid"}
]

In [4]:
mean_text = input("Enter Mean:")
mean = float(mean_text)

standard_deviation_text = input("Enter Standard Deviation:")
standard_deviation = float(standard_deviation_text)

number_of_samples = 1000
epochs = 20
learning_rate = 0.1

##### Fix Dataset Generation - should be between [-2*std,2*std] #####
D = np.random.normal(mean, standard_deviation, (number_of_samples,1))

D_train = D[0:int(0.9*number_of_samples)]
D_test = D[int(-0.1*number_of_samples):number_of_samples]

print(D_train.shape, D_test.shape)

# from sklearn.datasets import make_moons
# from sklearn.model_selection import train_test_split

# X, y = make_moons(n_samples = number_of_samples, noise=0.2, random_state=100)
# X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1,random_state=42)

# print(X.shape, y.shape)

Enter Mean:5
Enter Standard Deviation:5
(900,) (100,)


In [5]:
# Training
parameters = train(np.transpose(D_train), np.transpose(D_train), network_architecture, epochs, learning_rate)

ValueError: shapes (64,1) and (900,) not aligned: 1 (dim 1) != 900 (dim 0)

In [None]:
# Prediction
Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), parameters, network_architecture)

# Accuracy achieved on the test set
acc_test = Accuracy(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0],1))))
print("Test Set Accuracy: {:.2f}".format(acc_test))