In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from scipy import signal # for calculating convolutions 

from skimage.measure import block_reduce # for calculating maxpooling
import time 
import scipy 

## 1)  Neural network implementation

### 1.1) Layers 

In [2]:
class Dense():
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)
        
    def forward_propagation(self, input_data):
        self.input_data = input_data
        return (np.dot(self.weights, self.input_data) + self.bias)
    
    def backward_propagation(self, output_gradient, learning_rate):
        weight_gradient = np.dot(output_gradient, self.input_data.T)
        self.weights -= learning_rate * weight_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)      

In [3]:
class Convolution():
    def __init__(self, input_dimensions, filter_size, output_depth):
        self.output_depth = output_depth # output depth = number of filters 
        self.input_depth, self.input_height, self.input_width = input_dimensions
        self.input_dimensions = input_dimensions
        self.output_dimensions = (output_depth, self.input_height - filter_size + 1, self.input_width - filter_size + 1)
        self.filter_dimensions = (output_depth, self.input_depth, filter_size, filter_size)
        self.filters = np.random.randn(*self.filter_dimensions)
        self.biases = np.random.randn(*self.output_dimensions)

    def forward_propagation(self, input_data):
        self.input_data = input_data
        self.output = np.copy(self.biases)
        for i in range(self.output_depth):
            for j in range(self.input_depth):
                self.output[i] += signal.correlate2d(self.input_data[j], self.filters[i, j], "valid")
        return self.output

    def backward_propagation(self, output_gradient, learning_rate):
        grad_filters = np.zeros(self.filter_dimensions)
        grad_input = np.zeros(self.input_dimensions)
        for i in range(self.output_depth):
            for j in range(self.input_depth):
                grad_filters[i, j] = signal.correlate2d(self.input_data[j], output_gradient[i], "valid")
                grad_input[j] += signal.correlate2d(output_gradient[i], self.filters[i, j], "full")
        grad_biases = output_gradient
        # Update
        self.filters -= learning_rate * grad_filters
        self.biases -= learning_rate * grad_biases
        return grad_input

#### Initialization:
 * `__init__` method : initializes the convolutional layer.
* `input_dimensions` : Represents the dimensions of the input data (depth, height, width).
* `filter_size` : Specifies the size of the filters used in the convolution.
* `output_depth`: Indicates the number of filters to be applied.

It sets up the dimensions for the filters, biases, and initializes them randomly.

#### Forward Propagation:
* `forward_propagation` method computes the output of the convolutional layer given an input.
* `input_data` : Represents the input data passed to the layer.

It performs a correlation operation between the input data and filters to generate the output feature maps.
the forward propagation loops through each filter and correlates it with corresponding input channels to produce the output.

#### Backward Propagation:
* `backward_propagation` method calculates gradients and updates parameters during backpropagation.
* `output_gradien`: Represents the gradient coming from the subsequent layers.
* `learning_rate`: Specifies the rate at which the model learns.

Calculates gradients of filters and input data with respect to the loss.
The Backward Propagation loops through each filter and computes its gradient by correlating the input and output gradients.It updates the filters and biases based on these gradients and the learning rate and returns the gradient with respect to the input for further backpropagation.

In [4]:
class Maxpooling():
    def __init__(self, input_dimensions, filter_size, stride): 
        # Initializing the Maxpooling layer with input dimensions, filter size, and stride
        self.input_depth, self.input_height, self.input_width = input_dimensions
        self.input_dimensions = input_dimensions 
        self.filter_size = filter_size 
        self.filter_h, self.filter_w = filter_size 
        self.stride = stride
        
        # Calculating output dimensions based on input dimensions, filter size, and stride
        self.output_height = int(1 + (self.input_height - self.filter_h) / stride)
        self.output_width = int(1 + (self.input_width - self.filter_w) / stride)
        self.output_depth = self.input_depth 
        
    def forward_propagation(self, input_data):
        # Performing forward propagation, computing the output of the Maxpooling layer
        self.input_data = input_data
        output = np.zeros((self.output_depth, self.output_height, self.output_width))
        stride = self.stride
        
        # Looping through each depth, height, and width of the output
        for c in range(self.output_depth): 
            for i in range(self.output_height):
                for j in range(self.output_width):
                    # Applying max pooling operation to get the maximum value within the filter window
                    output[c, i, j] = np.max(input_data[c, i * stride: i * stride + self.filter_h, j * stride: j * stride + self.filter_w]) 
        return output

    def backward_propagation(self, output_gradient, learning_rate):
        # Performing backward propagation to calculate gradients during backpropagation
        grad_input = np.zeros((self.input_depth, self.input_height, self.input_width))
        input_data = self.input_data
        stride = self.stride
        
        # Looping through each depth, height, and width of the output to compute gradients
        for c in range(self.output_depth):
            for i in range(self.output_height):
                for j in range(self.output_width):
                    # Finding the indices of the max value within the filter window
                    interm = input_data[c, i * stride: i * stride + self.filter_h, j * stride: j * stride + self.filter_w]
                    i_max, j_max = np.where(np.max(interm) == interm)
                    i_max, j_max = i_max[0], j_max[0]
                    # Assigning the output gradients to the corresponding indices in the input gradients
                    grad_input[c, i * stride: i * stride + self.filter_h, j * stride: j * stride + self.filter_w][i_max, j_max] = output_gradient[c, i, j]

        return grad_input

In [5]:
class Dropout():
    def __init__(self, q_bernoulli):
        # Initializing the Dropout layer with a probability parameter
        self.p_bernoulli = 1 - q_bernoulli  # Computing the retention probability

    def forward_propagation(self, input_data):
        # Performing forward propagation, applying dropout to the input data
        self.input_data = input_data
        # Creating a binary mask using Bernoulli distribution to drop neurons
        self.binary_mask = np.random.binomial(1, self.p_bernoulli, size=input_data.shape) / self.p_bernoulli
        # Applying dropout by element-wise multiplication with the binary mask
        self.output = input_data * self.binary_mask
        return self.output

    def backward_propagation(self, output_gradient, learning_rate):
        # Performing backward propagation, applying dropout to the gradient
        # Gradient passed through dropout is just the scaled gradient with the binary mask
        return output_gradient * self.binary_mask

In [6]:
class Dimension():
    def __init__(self, input_dimension, output_dimension):
        # Initializing the Dimension transformation with input and output dimensions
        self.input_dimension = input_dimension  # Input dimension
        self.output_dimension = output_dimension  # Output dimension
    
    def forward_propagation(self, input_data):
        # Performing forward propagation by reshaping the input to the output dimension
        return np.reshape(input_data, self.output_dimension)
    
    def backward_propagation(self, output_gradient, learning_rate):
        # Performing backward propagation by reshaping the output gradient to the input dimension
        return np.reshape(output_gradient, self.input_dimension)

#### 1.2) Activation layers

##### * Sigmoide 

In [7]:
class Tanh():
    def __init__(self):
        # Defining the Tanh activation function and its derivative
        tanh = lambda x: np.tanh(x)  # Tanh activation function
        tanh_p = lambda x: 1 - np.tanh(x) ** 2  # Derivative of the Tanh function
        self.activation = tanh  # Assigning the Tanh activation function
        self.derivative_activation = tanh_p  # Assigning the derivative of the Tanh function

    def forward_propagation(self, input_data):
        # Forward propagation: computing the output by applying the Tanh activation function
        self.input = input_data  # Storing the input
        return self.activation(self.input)  # Applying Tanh function to the input

    def backward_propagation(self, output_gradient, learning_rate):
        # Backward propagation: computing gradients using the derivative of the Tanh function
        return np.multiply(output_gradient, self.derivative_activation(self.input))
        # Element-wise multiplication of the output gradient by the derivative of Tanh to obtain the input gradient

##### * Softmax 

In [8]:
class Softmax():
    def forward_propagation(self, input_data):
        # Normalizing the input data using softmax function
        maxi = np.max(input_data)  # Finding the maximum value in the input data
        input_data = input_data - maxi  # Subtracting the maximum value for numerical stability
        expo = np.exp(input_data)  # Calculating the exponentials of the normalized input
        self.output = expo / np.sum(expo)  # Calculating the softmax probabilities
        return self.output  # Returning the softmax output probabilities
    
    def backward_propagation(self, output_gradient, learning_rate):
        # Backpropagation of softmax layer
        # This layer often uses Cross-Entropy Loss (cce) as the error function
        return output_gradient  # Transmitting the output gradient directly without further computation

##### * Relu 

In [9]:
class Relu():
    def forward_propagation(self, input_data):
        # Forward propagation: computes the output of the ReLU activation function
        self.input = input_data  # Storing the input for later use
        self.output = np.maximum(0, input_data)  # Applying ReLU activation
        return self.output  # Returning the ReLU output
    
    def backward_propagation(self, output_gradient, learning_rate):
        # Backward propagation: computes the gradient of the ReLU activation function
        # Applying the derivative of ReLU to the output gradient
        inter = output_gradient.copy()  # Creating a copy of the output gradient
        inter[inter <= 0] = 0  # Replacing negative values in the gradient with 0
        return inter  # Returning the computed gradient


##### 1.3) Error 

In [10]:
# Mean Squared Error (MSE)
def mse(desired_output, output):
    return np.mean(np.power(desired_output - output, 2))

def mse_derivative(desired_output, output):
    return 2 * (output - desired_output) / np.size(output)

# Binary Cross Entropy (BCE)
def bce(desired_output, output):
    desired_output = np.clip(desired_output, 1e-7, 1 - 1e-7)  # Avoiding division by zero / log(0)
    output = np.clip(output, 1e-7, 1 - 1e-7)  # Avoiding division by zero / log(0)
    return -np.mean(desired_output * np.log(output) + (1 - desired_output) * np.log(1 - output))

def bce_derivative(desired_output, output):
    output = np.clip(output, 1e-7, 1 - 1e-7)
    desired_output = np.clip(desired_output, 1e-7, 1 - 1e-7)
    return ((1 - desired_output) / (1 - output) - desired_output / output) / np.size(output)

# Categorical Cross Entropy (CCE)
def cce(desired_output, output):
    desired_output = np.clip(desired_output, 1e-7, 1 - 1e-7)
    output = np.clip(output, 1e-7, 1 - 1e-7)
    return -np.sum(np.log(output) * desired_output)

def cce_derivative(desired_output, output):
    desired_output = np.clip(desired_output, 1e-7, 1 - 1e-7)
    output = np.clip(output, 1e-7, 1 - 1e-7)
    return output - desired_output

#### 1.4) Neural Networks 

In [11]:
def precision_error(res, input, output):
    successes = 0  # Counter for successful predictions
    total = 0  # Counter for total predictions
    error_total = 0  # Total error

    # Iterating through each entry in the dataset
    for i in range(len(input)):
        prediction = res.prediction(input[i])  # Obtaining the prediction using the model
        error_total += res.error(output[i], prediction)  # Calculating the error for this prediction

        # Checking if the index of the maximum value in the prediction matches the expected output
        maxi = np.argmax(prediction)
        if maxi == np.argmax(output[i]):
            successes += 1  # Incrementing the success counter if the prediction is correct

        total += 1  # Incrementing the total counter for predictions

    # Calculating the accuracy (successes / total) and average error (error_total / total)
    accuracy = successes / total
    average_error = error_total / total

    return (accuracy, average_error)  # Returning a tuple containing accuracy and average error


In [12]:
class Network():
    def __init__(self, layers, error_function, error_derivative):
        self.layers = layers  # Neural network layers
        self.error = error_function  # Error function
        self.error_derivative = error_derivative  # Derivative of the error function

    def prediction(self, input_data):
        output = input_data
        for layer in self.layers:
            output = layer.forward_propagation(output)
        return output

    def training(self, input_train, output_train, input_test, output_test, iterations, learning_rate):
        # Initialization of lists to store errors and precisions
        error_list = []
        error_list_test = []
        precision_train = []
        precision_test = []
        
        # Training loop
        for iteration in range(iterations):
            print("Iteration number:", iteration + 1)
            error = 0
            start_time = time.time()
            success_train = 0
            total_train = 0
            
            # Training phase
            for i in range(len(input_train)):
                # Forward propagation
                output = input_train[i]
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                
                # Calculating accuracy
                if np.argmax(output) == np.argmax(output_train[i]):
                    success_train += 1
                total_train += 1
                
                # Adding error
                error += self.error(output_train[i], output)
                
                # Backpropagation
                output_backward = self.error_derivative(output_train[i], output)
                for layer in reversed(self.layers):
                    output_backward = layer.backward_propagation(output_backward, learning_rate)
            
            # Error handling and calculation
            error /= len(input_train)
            error_list.append(error)
            print("Error:", error)
            
            # Computing error and accuracy on test set
            test_accuracy, test_error = precision_error(self, input_test, output_test)
            error_list_test.append(test_error)
            print("Error on test set:", error_list_test[-1])
            
            # Computing and storing training accuracy
            precision_train.append(success_train / total_train)
            precision_test.append(test_accuracy)
            print("Accuracy on training set:", precision_train[-1])
            print("Accuracy on test set:", precision_test[-1])
            print("Iteration duration:", round(time.time() - start_time, 2), "s")
            print()

        print("End of training")
        print("Training duration:", round(time.time() - start_time, 2), "s")
        
        return (error_list, error_list_test, precision_train, precision_test)

    def test(self, input_test, output_test):
        error = 0
        for i in range(len(input_test)):
            # Forward propagation
            output = input_test[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            
            # Adding error
            error += self.error(output_test[i], output)
        
        error /= len(input_test)
        return error


#### 3) MNIST Classification 

In [13]:
from keras.datasets import mnist # MNIST Data

2023-11-21 17:21:31.550184: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
# Loading MNIST dataset
(input_train, output_train), (input_test, output_test) = mnist.load_data()
# input_test and output_test: Test input and output data

# Data Processing
def clean_mnist(input_data, output_data, limit):
    # limit: Maximum number of examples per digit
    indices = []
    for i in range(10):
        # Selecting indices up to the limit for each digit
        indices.extend(np.where(output_data == i)[0][:limit])
    indices = np.random.permutation(indices)
    x = input_data[indices]
    y = output_data[indices]
    # Reshaping input data for network compatibility
    x = x.reshape(len(x), 1, 28, 28)  # Adjusting image format to (depth, height, width)
    x = x.astype("float32") / 255  # Normalizing input data
    identity_matrix = np.eye(10)  # Identity matrix of size 10
    l = [identity_matrix[i] for i in y]
    y = np.reshape(np.array(l), (len(y), 10, 1))
    return x, y

# Cleaning and preparing training and test data
(input_train, output_train) = clean_mnist(input_train, output_train, 1000)
(input_test, output_test) = clean_mnist(input_test, output_test, -1)

# Neural Network Architecture
# Layers: Dense (fully connected), Convolutional, Maxpooling, Dimension, Relu, Softmax
layers = [
    Convolution((1, 28, 28), 3, 32),  # Convolution layer with specified parameters
    Relu(),  # Rectified Linear Unit activation
    Maxpooling((32, 26, 26), (2, 2), 2),  # Maxpooling layer with specified parameters
    Dimension((32, 13, 13), (32 * 13 * 13, 1)),  # Dimension layer mapping input to output size
    Dense(32 * 13 * 13, 100),  # Fully connected layer with 100 neurons
    Relu(),  # Activation function (ReLU)
    Dense(100, 10),  # Output layer with 10 neurons for classification
    Softmax()  # Softmax activation for probabilities
]

# Creating the neural network
network = Network(layers, cce, cce_derivative)  # Initializing the neural network with layers and error functions

# Training the neural network
train_error, test_error, train_accuracy, test_accuracy = network.training(
    input_train, output_train, input_test, output_test, 20, 0.1
)

# Plotting results
plt.figure()
plt.plot(range(1, 51), train_error)
plt.plot(range(1, 51), test_error)
plt.show()

plt.figure()
plt.plot(range(1, 51), train_accuracy)
plt.plot(range(1, 51), test_accuracy)
plt.show()

Iteration number: 1
Error: 2.3815929457956413
Error on test set: 2.3223088988553133
Accuracy on training set: 0.1026
Accuracy on test set: 0.101001001001001
Iteration duration: 1814.09 s

Iteration number: 2
Error: 2.3240918088586633
Error on test set: 2.3223088988553133
Accuracy on training set: 0.103
Accuracy on test set: 0.101001001001001
Iteration duration: 2020.53 s

Iteration number: 3
Error: 2.3240918088586633
Error on test set: 2.3223088988553133
Accuracy on training set: 0.103
Accuracy on test set: 0.101001001001001
Iteration duration: 1714.78 s

Iteration number: 4
Error: 2.3240918088586633
Error on test set: 2.3223088988553133
Accuracy on training set: 0.103
Accuracy on test set: 0.101001001001001
Iteration duration: 1589.64 s

Iteration number: 5


KeyboardInterrupt: 