In [3]:
import tensorflow as tf
import numpy as np


In [77]:
#create input data
input_data = np.random.rand(100)

#create lambda function to apply on data
f = lambda x: -x**3+x**2

#create target set
target = f(input_data)

#define ReLU
def reLu(x):
    return x * (x > 0)
def derived_reLu(x):
    return 1 if x > 0 else 0

In [220]:
class Layer:
    def __init__(self, n_units, input_units):
        self.n_units = n_units
        self.input_units = input_units
        
        #bias vector initialized with zeros
        self.bias_vector = np.zeros(n_units)
        
        #Xavier Initialization for Random Weights
        self.weight_matrix = np.random.normal(0, np.sqrt(2/n_units+input_units),(input_units, n_units))
        
        #Initialize empty attributes
        self.layer_input = None
        self.layer_preactivation = None
        self.layer_activation = None
        
    def forward_step(self, input_vector):
        #get vector of (weight x input) + bias
        self.layer_input = input_vector
        """print(np.shape(self.weight_matrix))
        print(self.weight_matrix)
        print(self.layer_input)
        print(self.bias_vector)"""
        self.layer_preactivation = self.weight_matrix.T @ self.layer_input + self.bias_vector
        """print("weight matrix")
        print(self.weight_matrix.T)
        print("input")
        print(self.layer_input)
        print("preactivation")
        print(self.layer_preactivation)"""
        self.layer_activation = reLu(self.layer_preactivation)
        #print("activation")
        #print(self.layer_activation)
        return self.layer_activation
        
    def backward_step(self, activation_derivative, lr):
        
        #calculates gradient for weights and biases
        bias_grad = derived_reLu(np.any(self.layer_preactivation)) * activation_derivative
        #print(self.layer_input.T)
        """print(bias_grad)
        print(np.array([bias_grad]))
        print(np.shape(bias_grad))
        print(self.layer_input)
        print(np.shape(self.layer_input))
        print(np.array([self.layer_input]).T)
        print(np.shape(np.array([self.layer_input]).T))"""
        weight_grad = np.array([self.layer_input]).T @ np.asarray([bias_grad])
        
        #calculates the derivative of the loss function with regards to the activation
        print(self.weight_matrix)
        new_derivative = np.asarray([bias_grad]) @ self.weight_matrix.T
        
        #update weights and bias
        self.bias_vector = self.bias_vector - lr*bias_grad
        self.weight_matrix = self.weight_matrix - lr*weight_grad
        
        return new_derivative

In [221]:
class MLP:
  # initialized with learning rate, 
  # list of number of layers and their respective unit size
    def __init__(self, lr, n_layers, units_list):
        self.lr = lr
        #initialize layers as list of layers
        self.layers = []
        self.target = target
        self.last_output = None
        units_list = units_list
        n_input = 1
        for n_units in units_list:
            self.layers.append(Layer(n_units, n_input))
            n_input = n_units
    def forward_step(self, data_point):
        #propagates input signal forward through the list by 
        #updating the input for the next layer
        layer_input = data_point
        for layer in self.layers:
            layer_input = layer.forward_step(layer_input)    
        self.last_output = layer_input
        #print("last output")
        #print(self.last_output)
        return self.last_output
    def backpropagation(self, target):
        #compute derivative for loss function
        delta = self.last_output - target
        #perform backpropagation by executing backward_step on each layer and updating the derivative
        for layer in reversed(self.layers):
            delta = layer.backward_step(delta, self.lr)


In [222]:
mlp = MLP(0.01, 2, [10,1])
loss = []
for i in range(1000):
    for d, t in zip(input_data, target):
        #perform forward step
        mlp.forward_step(np.asarray([d]))
        #perform backward step
        mlp.backpropagation(t)
        #record loss
        loss.append((1/2)*(mlp.last_output - t)**2)


[[ 5.44143637]
 [ 2.27588221]
 [ 5.14694155]
 [ 1.70872538]
 [-0.19047581]
 [ 1.9462242 ]
 [ 4.76330009]
 [-0.32646899]
 [ 0.45104944]
 [-8.80868893]]
[[ 0.17894276  0.22189702 -0.49134614 -0.75717973 -0.18037082 -0.97266899
  -1.6631664   1.44273391  0.54446453 -1.62924713]]
[[ 5.44138585]
 [ 2.27581956]
 [ 5.14694155]
 [ 1.70872538]
 [-0.19047581]
 [ 1.9462242 ]
 [ 4.76330009]
 [-0.32687634]
 [ 0.45089571]
 [-8.80868893]]


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 10)

In [189]:
x = np.array([[1.,2.,3.,4.]])
x.T

array([[1.],
       [2.],
       [3.],
       [4.]])