In [29]:
import numpy as np

### initialize weights
def xavier_initialization(input_size, output_size) ->np.ndarray:
    bound = np.sqrt(6) / np.sqrt(input_size + output_size)
    weights = np.random.uniform(-bound, bound, size=(input_size, output_size))
    
    return weights

In [53]:
### Feed-Forward Layer
class FeedForwardLayer():
    def __init__(self, input_size, output_size):
        self.weights = xavier_initialization(input_size, output_size)
        self.biases = np.zeros((1,output_size))

    def forward(self, x):
        self.x = x
        output = np.dot(self.x, self.weights) + self.biases
        
        return output

    def backward(self, d_values, learning_rate):
        d_weights = np.dot(self.x.T, d_values)
        d_biases = np.sum(d_values, axis=0, keepdims=True)


        d_inputs = np.dot(d_values, self.weights.T)

        self.weights -= learning_rate * d_weights
        self.biases -= learning_rate * d_biases

        return d_inputs

In [47]:
### Adding Nonlinearity
class ReLuActivationFuction():
    def forward(self, x):
        self.x = x
        output = np.maximum(0, x)
        
        return output

    def backward(self, d_values):
        d_inputs = d_values * np.where(self.x>0, 1, 0)
        
        return d_inputs

In [48]:
### Loss Function Binary cross Entropy
class BinaryCrossEntropy():
    def forward(self, output, target):
        loss = -(target * np.log(output) + (1 - target) * np.log(1 - output))
        
        return loss
        
    def backward(self, output, target):
        return -target/output + (1 - target)/(1 - output)

In [49]:
### For a binary classification problem, we can use sigmoid activation function to present probability
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class SigmoidActivationFunction():
    
    def forward(self, x):
        self.x = x
        output = sigmoid(x)
        
        return output

    def backward(self, d_values):
        return d_values * sigmoid(self.x) * (1 - sigmoid(self.x))

In [60]:
### Implementation

class MLP_implementation():
    def __init__(self,
                 input_size,
                 output_size,
                 hidden_layers,
                 hidden_layers_size,
                 hidden_activation_func,
                 output_activation_function,
                 loss_function,
                ):
        self.hidden_layers = hidden_layers
        self.hidden_layers_size = hidden_layers_size
        self.hidden_activation_func = hidden_activation_func
        self.loss_function = loss_function
        self.output_activation_function = output_activation_function
        self.layers = []

        ### Initialize hidden layers
        for i in range(hidden_layers):
            if i == 0:
                layer = FeedForwardLayer(input_size, hidden_layers_size)
            else:
                layer = FeedForwardLayer(hidden_layers_size, hidden_layers_size)
            self.layers.append(layer)
        ### Initialize output layer
        self.output_layer = FeedForwardLayer(hidden_layers_size, output_size)

    def forward_pass(self, x):
        output = x
        for layer in self.layers:
            output = layer.forward(output)
            output = self.hidden_activation_func.forward(output)

        output = self.output_layer.forward(output)
        output = self.output_activation_function.forward(output)
            
        return output
    def backward_pass(self, d_values, learning_rate):
        #output layer
        d_values = self.output_activation_function.backward(d_values)
        d_values = self.output_layer.backward(d_values, learning_rate)

        for layer in reversed(self.layers):
            d_values = self.hidden_activation_func.backward(d_values)
            d_values = layer.backward(d_values, learning_rate)
    def train(self, input_data, targets, learning_rate=1, epochs=1):
            for epoch in range(epochs):
                random_order = np.random.permutation(np.array(range(len(input_data))))
                for i in random_order:
                    output = self.forward_pass(input_data[i].reshape((1, len(input_data[i])))) #forwardpass
                    loss = self.loss_function.forward(output, targets[i]) #loss
                    d_values = self.loss_function.backward(output, targets[i]) #backwardpass
                    self.backward_pass(d_values, learning_rate)

    def inference(self, input_data):
        output= []
        for i in range(len(input_data)):
            output.append(self.forward_pass(input_data[i]))
        return np.array(output)



In [61]:
### Initialize MLP
xor_mlp = MLP_implementation(input_size=2,
                            output_size=1,
                            hidden_layers=3,
                            hidden_layers_size=3,
                            hidden_activation_func=ReLuActivationFuction(),
                            output_activation_function=SigmoidActivationFunction(),
                            loss_function=BinaryCrossEntropy(),
                            )

In [68]:
### Training
input_data = np.array([[0,0], [0,1], [1,0], [1,1]])
targets = np.array([[0], [1], [1], [0]])

xor_mlp.train(input_data, targets, learning_rate=0.05, epochs=3000)

In [69]:
### Evaluation
def accuracy(predictions: np.ndarray, targets: np.ndarray, threshold=0.5) -> float:
    binary_predictions = (predictions >= threshold).astype(int)
    print(binary_predictions)
    accuracy_value = np.mean(binary_predictions == targets)
    return accuracy_value


predictions = xor_mlp.inference(input_data).reshape(targets.shape)

accuracy_value = accuracy(predictions, targets, threshold=0.5)
print(accuracy_value)

[[0]
 [0]
 [0]
 [0]]
0.5
