Setup details.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

Generate data.

In [None]:
""" Compile data """

def compile_dataset():
  X = [] # Initialize the dataset itself
  Y = [] # The corresponding label (Y) for each entry of X

  # loop through all possible 4-bit combinations
  for i in range(16):
    # Generate new value
    x = tuple(format(i, '04b'))
    # count the number of ones
    num_ones = x.count('1')
    # Add to X
    X.append(list(x))
    # set the value of the corresponding key to 1 if there are an odd number of ones, 0 otherwise
    if num_ones % 2 == 1: Y.append(1)
    else: Y.append(0)
  return np.array(X, dtype=np.float64), np.array(Y, dtype=np.float64)


Create the network model.

In [None]:
""" Options """

# Literals that define the network architecture
INPUTS = 1 # The ammount of inputs in X
LAYERS = [4, 1] # An array containing the number of neurons in each layer

""" Build and Initialize the model """

# Build and return the network of weights and biases
def build_model():
  # Build the model layer by layer
  weights = []
  for l in range(len(LAYERS)):
    inputs = INPUTS if l == 0 else LAYERS[l-1] # Define the number of inputs for each layer
    neurons = LAYERS[l] # Define the number of neurons
    layer = np.asarray([[None for _ in range(inputs + 1)] for _ in range(neurons)]) # Create an array containing the weights for each neuron in the layer including bias
    weights.append(layer) # Add the layer to the weights
  return weights

# Initialize the weights of the model
def init_params(model):
  # Initialize the weights randomly
  for layer in range(len(model)):
    for neuron in range(len(model[layer])):
      for weight in range(len(model[layer][neuron])):
        model[layer][neuron, weight] = np.random.rand()*2-1

""" Functions relevant to the model usage """

# The activation function to be used for calculating the output of a layer
def activation(z):
  return 1 / ( 1 + np.exp(-z.astype(np.float64))) # Return the result of a sigmoid function

# Calculate the outputs/prediction (yhat) using a forward pass through the network
def forward_pass(model, X):
  cache = [] #Initialize the empty cache
  for layer in range(len(model)): # Iterate through every layer
    inputs = np.array(X, dtype=np.float64) if layer == 0 else cache[layer-1][1]
    weight_matrix = model[layer] # Grab all the weights & biases from the layer
    layer_bias = weight_matrix[:, 0] # Extract bias from the layer
    layer_weights = weight_matrix[:, 1:]  # Extract weights from the layer
    hypothesis =  np.dot(inputs, layer_weights.T) + layer_bias # Compute the linear hypothesyses
    input_cache = (inputs, weight_matrix) # Box up and store variables use in calculation
    activation_cache = activation(hypothesis) # Compute activation_cache
    cache.append((input_cache, activation_cache))
  return cache



Define how the model will learn.

In [None]:
""" Functions used in learning """

# Binary cross entropy loss function
def error(y_true, y_pred, eps=1e-15):
    y_pred = np.clip(y_pred, eps, 1 - eps)  # Clip y_pred to avoid 0 or 1 values
    loss = - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

# Derivitive of binary cross entropy loss
def error_derivitive(y_true, y_pred, eps=1e-15):
    y_pred = np.clip(y_pred, eps, 1 - eps)  # Clip y_pred to avoid 0 or 1 values
    d_loss_d_y_pred = - (y_true / y_pred - (1 - y_true) / (1 - y_pred + eps))
    return d_loss_d_y_pred

# Perform back propigation in order to learn gradients
def back_prop(labels, caches):
  # Prep neccesary variables
  num_layers = len(caches)
  num_examples = len(labels)
  predictions = caches[len(params)-1][1]
  Y = labels.reshape(predictions.shape)
  # Build 'gradients'
  gradients = [None for _ in range(num_layers)]
  # Calculate derivitives w.r.t. prediction
  err_derivitives = error_derivitive(Y, predictions) #-(Y / predictions - (1 - Y) / (1 - predictions))
  # Calculate gradient for first layer
  current_cache = caches[num_layers-1]
  err_derivitives, gradients[num_layers-1] = calc_gradients(err_derivitives, current_cache)
  # Calculate gradient for remaining layers
  for layer in reversed(range(num_layers-1)):
    current_cache = caches[layer]
    err_derivitives, gradients[layer] = calc_gradients(err_derivitives, current_cache)
  return gradients
  

def calc_gradients(e, cache):
  # Unbox neccesary variables
  input_cache, activation_cache = cache
  input, weight_matrix = input_cache
  layer_bias = weight_matrix[:, 0] # Extract bias from the layer
  layer_weights = weight_matrix[:, 1:]  # Extract weights from the layer
  m = input.shape[1]
  # Calculate the derivitives of the sigmoid function
  delta = e*activation(activation_cache) * (1-activation(activation_cache))
  # Calculate gradients
  de = np.sum(np.dot(delta, layer_weights), axis=1)[np.newaxis].T
  dw = (1/m)* np.sum(delta * activation_cache, axis=0)
  db = (1/m)* np.sum(delta, axis=0, keepdims=True)
  return de, (dw, db)

  # Calculate the derivitives of the sigmoid function
  #delta = e*(activation(activation_cache) * (1-activation(activation_cache)))
  # Calculate gradients
  #de = np.dot(delta, layer_weights)
  #dw = (1/m)*np.dot(delta.T, input)
  #db = (1/m)*np.sum(delta, axis=0, keepdims=True)

def update_weights(model, gradients, learning_rate):
  for l in range(len(model)):
    model[l][:, 1:] = model[l][:, 1:] - learning_rate*gradients[l][0] # Update Weights
    model[l][:, 0] = model[l][:, 0] - learning_rate*gradients[l][1] # Update Bias


Train the model.

In [None]:
""" Options """

EPOCHS = 5000
LEARNING_RATE = .075

""" Prepare the data """

# Get the dataset
X, Y = compile_dataset()

# Permute and sample the dataset
#np.random.shuffle(X)

# Create the model
params = build_model()
init_params(params)

""" Train the model... """

for epoch in range(EPOCHS): # Iterations through the data
  caches = forward_pass(params, X) # Results of forward pass
  yhat = caches[len(params)-1][1].flatten() # Predictions
  grad = back_prop(Y, caches) # Gradients
  update_weights(params, grad, LEARNING_RATE) # Update the weights with the calculated gradients

""" Evaluate final accuracy... """

print("Training set accuracy = " + str(np.sum(np.where(yhat < .5, 1, 0))/len(X)))


  


Training set accuracy = 0.5
