# General lib imports

In [22]:
import pandas as pd
import numpy as np
from progressbar import progressbar

# Code

## Functions

In [108]:
def compute_activation (X, activation_type, derivative=False):
    '''Defining activation functions
     Takes a nparray or a single value
    # Returns in the same format
    
    For softmax : assuming that X.shape[0]== n_neurons,
        the axis0 of array X is used for computing the mean 
    '''
    X=np.array(X)
    if activation_type == 'relu':
        if derivative == False:
            return np.maximum(X,0)
        else:
            return (X > 0).astype(int)
        
    elif activation_type == 'sigmoid':
        if derivative == False:
            return 1 / (1 + np.exp(-X))
        else:
            return np.exp(-X) / np.square(1 + np.exp(-X))
    
    elif activation_type == 'tanh':
        if derivative == False:
            return np.tanh(X)
        else:
            return np.square(1 / np.cosh(X))
            
    elif activation_type == 'linear':
        if derivative == False:
            return X
        else:
            return np.ones(X.shape)
            
    elif activation_type == 'softmax':
        exp_x = np.exp(X)
        sigma_exp_x = exp_x.sum(axis=0)
        if derivative == False: 
            return exp_x / sigma_exp_x
        else:
            return (exp_x * sigma_exp_x - np.square(exp_x)) / np.square(sigma_exp_x)
        
    #raise error if unknown type
    else:
        raise ValueError(f'Unknown activation type {activation_type}.\
                           Supported types : linear, relu, sigmoid, tanh, softmax')

In [40]:
def compute_metric (y, y_pred, metric, loss_derivative=False):
    '''Defining loss and metric functions
     Takes nparrays, lists or a single values
     
     ## IF loss_derivative==False:
         output: always scalar
         
     ## IF loss_derivative==True: (True will be ignored for non-loss metrics)
         Computes the partial derivative of the loss function
           with respect to each component of each sample
         output: 2Darray
            n_samples * 1 for binary_crossentropy or single output regression
            n_samples * n_class for categorical_crossentropy
            n_samples * n_features for multifeatures regression)
    '''
        
    #converting DataFrames, lists or lists of lists to nparray
    y = np.array(y)
    y_pred = np.array(y_pred)
        
    #deal with 1D inputs to forge a n_samples * 1 2D-array
    if len(y.shape) == 1:
        y = np.expand_dims(y, axis = 1)
    if len(y_pred.shape) == 1:
        y_pred = np.expand_dims(y_pred, axis = 1)
            
    #raise errors for unconsistant inputs
    if len(y.shape) > 2:
        raise ValueError('y vector dimension too high. Must be 2 max')
    if len(y_pred.shape) > 2:
        raise ValueError('y_pred vector dimension too high. Must be 2 max')
    if y.shape != y_pred.shape:
        raise ValueError(f'unconsistent vectors dimensions during scoring :\
                           y.shape= {y.shape} and y_pred.shape= {y_pred.shape}')
        
    #compute loss funtions (or derivatives if loss_derivative==True)
    if metric == 'mse':
        if loss_derivative == False:
            return np.square(y-y_pred).mean()
        else:
            return 1 / y.size * 2 * (y_pred - y)
        
    elif metric == 'mae':
        if loss_derivative == False:
            return np.abs(y-y_pred).mean()
        else:
            return 1 / y.size * (y_pred - y) / np.abs(y - y_pred)
        
    elif metric == 'categorical_crossentropy':
        if loss_derivative == False:
            return -1/y.shape[0] * ((y * np.log(y_pred)).sum())
        else:
            return -1 / y.shape[0] * (y / y_pred)
        
    elif metric == 'binary_crossentropy':
        if y.shape[1]>1:
            raise ValueError('y vector dimension too high.\
                              Must be 1 max for binary_crossentropy')
        if loss_derivative == False:
            return -(y*np.log(y_pred)+(1-y)*np.log(1-y_pred)).mean()
        else:
            return -1 / y.size * (y / y_pred - (1-y) / (1-y_pred))
        
    # compute other metrics functions
    ## TODO ## accuracy, f1-score, recall, etc..
    else:
        raise ValueError(f'Unknown metric {metric}. Supported types :\
                           mse, mae, categorical_crossentropy, binary_crossentropy')

## Model class

In [57]:
class handmade_nn ():
    '''
    hand-made version of neural network
    so far, the possibilities are :
    
        - layers activation functions :
            'linear', 'relu', 'sigmoid', 'tanh', 'softmax'
    
        - loss functions :
            'mse', 'mae', 'binary_crossentropy', 'categorical_crossentropy'
    
        - solver :
            SGD without momentum
    '''
    def __init__ (self, input_dim=0):
        self.weights=[]
        self.bias=[]
        self.activation_types=[]
        self.input_dim=input_dim
        self.n_layers=0

    def set_input_dim (self,input_dim):
        self.input_dim = input_dim
        
    def add_dense_layer (self, n_neurons, activation_type):
        #check if the input_dim is set
        if self.input_dim == 0:
            raise ValueError('input_dim = 0 .\
                              Use set_input_dim before creating first layer')
            
        #get the size of the input os this layer
        if len(self.bias) == 0:
            previous_dim=self.input_dim
        else:
            previous_dim=(self.bias[-1].shape[0])
            
        #initialize the layer parameters 
        self.weights.append(np.zeros((n_neurons, previous_dim)))
        self.bias.append(np.expand_dims(np.zeros(n_neurons), axis=1))
        self.activation_types.append(activation_type)
        self.n_layers += 1
        
        #test the activation type
        compute_activation(0, activation_type)
        
    def predict (self, X, keep_hidden_layers=False):
        '''input X : list, list of lists, np array, pd DataFrame
               axis 0 = samples
               axis 1 = features
               
           ## IF keep_hidden_layers==False:
           output y_pred: 2D np-array
               axis 0 = samples
               axis 1 = output features, depending of the size of last layer
               
           ## IF keep_hidden_layers==True:
           output = list of 2D np-arrays of outputs of each layer
               len(list)=n_layers+1: 1st element = X itself
                                     last element = y_pred
               axis 0 = samples
               axis 2 = number of neurons of the layer'''
        #converting DataFrames, lists or lists of lists to nparray
        X = np.array(X)
        
        #deal with 1D inputs to forge a 1 * n_features 2D-array
        if len(X.shape) == 1:
            X = np.expand_dims(X, axis = 0)
            
        #raise errors for unconsistant inputs
        if len(X.shape) > 2:
            raise ValueError('X vector dimension too high. Must be 2 max')
        if X.shape[1] != self.input_dim:
            raise ValueError(f'Unconsistent number of features.\
                               The network input_dim is {self.input_dim}')
            
        #compute the prediction
        if keep_hidden_layers == True:
            layers_outputs_list = [X]
        for layer_index, activation_type in enumerate(self.activation_types):
            activation_input = np.dot(self.weights[layer_index], X.T)\
                               + self.bias[layer_index]
            X = compute_activation(activation_input, activation_type).T
            if keep_hidden_layers == True:
                layers_outputs_list.append(X)
        if keep_hidden_layers == True:
            return layers_outputs_list
        else:
            return X
    
    def score (self, X, y, metric):
        '''use predict method, then compute_metric function'''
        y_pred=self.predict(X)
        return compute_metric(y, y_pred, metric)
    
    def compute_backpropagation (self, X, y):
        '''This method :
            - executes self.predict(X) WITH keep_hidden_layers
                to keep all intermediate outputs
            - executes compute_metric (y, y_pred, loss) WITH loss_derivative
            - for each layer from last to first : computes loss
              derivatives with respect to bias and weights
              
            output 1 : gradient with respect to weights 
               (list of 2D arrays)
               len(list)=n_layers
               axis 0 = number of neurons of the layer
               axis 1 = number of neurons of the previous layer (or features in the input)
            output 2 : gradient with respect to bias
               (list of 1D arrays)
               len(list)=n_layers
               axis 0 = number of neurons of the layer
                    
            '''
        layers_outputs = self.predict(X, 
                                      keep_hidden_layers = True)
        loss_derivative = compute_metric (y, 
                                          layers_outputs[-1], 
                                          self.loss, 
                                          loss_derivative = True)
        for layer_index in range(n_layers, 0, -1):
            
            
        
        
    
    def fit (self, X, y, loss=None, learning_rate=0.01, batch_size=1, n_epochs=10):
        '''input X : 2D array or pd DataFrame
                axis 0 = samples
                axis 1 = features
        '''
        self.loss=loss
        self.learning_rate=learning_rate
        self.batch_size=batch_size
        self.epochs=epochs
        
        X = np.array(X)
        n_samples = X.shape[0]
        n_minibatches_per_epoch = int(n_samples / batch_size)
        
        pbar = progressbar()
        for epoch_index in range (n_epochs):
            print(f'beginning epoch n°{epoch_index + 1}')
            #shuffle rows of X
            X_epoch = np.random.shuffle(X)
            
            for mini_batch_index in pbar(range(n_minibatches_per_epoch)):
                ##TODO compute gradient, backprop
                ##TODO update weights and bias
                pass
    

# Tests

## compute_activation function tests

### test of each available function

In [107]:
assert (compute_activation(np.array([[-1,0], [0, 1], [1, 3]]),'relu') ==\
        np.array([[0,0], [0, 1], [1, 3]]))\
        .all(), "uncorrect relu function behaviour"

assert (compute_activation(np.array([[-1,0], [0, 1], [1, 3]]),'linear') ==\
        np.array([[-1,0], [0, 1], [1, 3]]))\
        .all(), "uncorrect linear function behaviour"

assert (np.round(compute_activation(np.array([[-1,0], [0, 1], [1, 3]]),'sigmoid'), decimals= 8) ==\
        np.array([[0.26894142, 0.5       ],
                  [0.5       , 0.73105858],
                  [0.73105858, 0.95257413]]))\
        .all(), "uncorrect sigmoid function behaviour"

assert (np.round(compute_activation(np.array([[-1,0], [0, 1], [1, 3]]),'tanh'), decimals= 8) ==\
        np.array([[-0.76159416,  0.        ],
                  [ 0.        ,  0.76159416],
                  [ 0.76159416,  0.99505475]]))\
        .all(), "uncorrect tanh function behaviour"

assert (np.round(compute_activation(np.array([[-1,0], [0, 1], [1, 3]]),'softmax'), decimals= 8) ==\
        np.array([[0.09003057, 0.04201007],
                  [0.24472847, 0.1141952 ],
                  [0.66524096, 0.84379473]]))\
        .all(), "uncorrect softmax function behaviour"

### raise ValueError for unknown activation type while calling compute_activation

In [6]:
from unittest import TestCase

test=TestCase()
with test.assertRaises(ValueError) as context:
    compute_activation(0,'typo_error')
assert 'Unknown activation type' in str(context.exception),\
    "no or wrong Exception raised when inputing an unknown activation_type\
     while calling compute_activation" 

## add_dense_layer method tests

### raise ValueError if no input_dim

In [7]:
from unittest import TestCase
my_first_nn=handmade_nn()

test=TestCase()
with test.assertRaises(ValueError) as context:
    my_first_nn.add_dense_layer(5,'relu')
assert 'Use set_input_dim before creating first layer'\
       in str(context.exception),\
    "no or wrong Exception raised when adding first layer\
     to a network without setting input_dim"

### raise ValueError for unknown activation type while adding layer

In [8]:
from unittest import TestCase
my_first_nn=handmade_nn(5)

test=TestCase()
with test.assertRaises(ValueError) as context:
    my_first_nn.add_dense_layer(10,'typo_error')
assert 'Unknown activation type' in str(context.exception),\
    "no or wrong Exception raised when inputing\
     an unknown activation_type while adding layer"    

## predict method tests

### handling with a list as an input

In [9]:
my_first_nn=handmade_nn(5)
# Empty neural network : just a pass-through for 5-values inputs
assert my_first_nn.predict([2,3,2,3,4]).shape == (1,5),\
    "list not supported as an input for predict"

### handling with a list of lists as input

In [10]:
my_first_nn=handmade_nn(5)
# Empty neural network : just a pass-through for 5-values inputs
assert my_first_nn.predict([[2,3,2,3,4],[-2,-1,1,3,4]]).shape == (2,5),\
    "list of list not supported as an input for predict"

### handling with a 1D-array as input

In [11]:
my_first_nn=handmade_nn(5)
my_first_nn.add_dense_layer(10, 'linear')
assert my_first_nn.predict(np.array([-2,-1,2,3,4])).shape == (1,10),\
    "1-D array not supported as an input for predict"

### handling with a 2D-array (most common case) as input

In [12]:
my_first_nn=handmade_nn(5)
my_first_nn.add_dense_layer(10, 'linear')
assert my_first_nn.predict(np.array([[-2,-1,2,3,4],
                                     [-12,-11,12,13,14]])).shape == (2,10),\
    f"the shape of the prediction for a 2*5 X input\
    by a network having 10neurons on last layer should be 2*10"

### raise error for 3D-array or more as input

In [13]:
from unittest import TestCase
my_first_nn=handmade_nn(5)
# Empty neural network : just a pass-through for 5-values inputs

test=TestCase()
with test.assertRaises(ValueError) as context:
    my_first_nn.predict(np.array([[[1,1],[1,2],[1,3],[1,4],[1,5]],
                                 [[2,1],[2,2],[2,3],[3,4],[3,5]]]))
assert 'X vector dimension too high' in str(context.exception),\
    "no or wrong Exception raised when inputing a 3D-array in predict method"    

### raise error for unconsitant X vs. input_dim

In [14]:
from unittest import TestCase
my_first_nn=handmade_nn(5)
# Empty neural network : just a pass-through for 5-values inputs

test=TestCase()
with test.assertRaises(ValueError) as context:
    my_first_nn.predict(np.array([[1,1],[1,2],[1,3],[1,4],[1,5]]))
assert 'Unconsistent number of features' in str(context.exception),\
    "no or wrong Exception raised when inputing a X\
     with unconsistant size vs. network input_dim in method predict"    

## general tests of predict method with all activation types

In [15]:
my_first_nn=handmade_nn(5)

my_first_nn.add_dense_layer(10, 'relu')
my_first_nn.weights[-1] = np.concatenate([np.identity(5), np.zeros((5,5))], axis=0)
my_first_nn.bias[-1] = np.expand_dims([0,0,0,0,1,1,1,0,0,0], axis=1)

my_first_nn.add_dense_layer(10, 'linear')
my_first_nn.weights[-1] = np.flip(np.identity(10), 1)
my_first_nn.bias[-1] = np.expand_dims([1,1,1,1,1,1,0,0,0,0], axis=1)

my_first_nn.add_dense_layer(10, 'tanh')
my_first_nn.weights[-1] = np.identity(10)
my_first_nn.bias[-1] = np.expand_dims([0,0,0,0,1,1,1,1,0,0], axis=1)

my_first_nn.add_dense_layer(10, 'softmax')
my_first_nn.weights[-1] = np.flip(np.identity(10), 1)
my_first_nn.bias[-1] = np.expand_dims([0,0,0,0,0,0,1,1,1,1], axis=1)

my_first_nn.add_dense_layer(1, 'sigmoid')
my_first_nn.weights[-1] = np.expand_dims(np.arange(1,11,1), axis=0)
my_first_nn.bias[-1] = np.expand_dims([0.5], axis=1)

assert np.round(my_first_nn.predict([-2,-1,2,3,4])[0,0], decimals=8) == 0.99939824,\
    "the general test of predict method on a network involving\
     all activation types and manually set bias and weights\
     did not return the correct value"    

## compute_metric function tests

### raise error for 3D-array or more as input

In [34]:
from unittest import TestCase

test=TestCase()
with test.assertRaises(ValueError) as context:
    compute_metric(np.array([[[1,1],[1,2]],
                             [[2,1],[2,2]]]),
                   np.array([[1,2],
                             [3,4]]),
                   'mse')
assert 'y vector dimension too high' in str(context.exception),\
    "no or wrong Exception raised when inputing a 3D-array as y\
     in compute_metric function"

test=TestCase()
with test.assertRaises(ValueError) as context:
    compute_metric(np.array([[1,2],
                             [3,4]]),
                   np.array([[[1,1],[1,2]],
                             [[2,1],[2,2]]]),
                   'mse')
assert 'y_pred vector dimension too high' in str(context.exception),\
    "no or wrong Exception raised when inputing a 3D-array as y_pred\
     in compute_metric function"  

### raise error for unconsistency between y & y_pred shapes

In [35]:
from unittest import TestCase

test=TestCase()
with test.assertRaises(ValueError) as context:
    compute_metric(np.array([[1,2,3],
                             [4,5,6]]),
                   np.array([[1,2],
                             [3,4]]),
                   'mse')
assert 'unconsistent vectors dimensions' in str(context.exception),\
    "no or wrong Exception raised when inputing unconsistent\
     y vs y_pred vectors shapes in compute_metric function"

### test of each available metric

In [36]:
assert compute_metric([1,0],[0.5,1],'mse') == 0.625,\
    "uncorrect mse metric behaviour"

assert compute_metric([[1,0],[0,0]],[[0.5,1],[1,1]],'mse') == 0.8125,\
    "uncorrect mse metric behaviour for multi-features regressions\
     (2D y and y_pred vectors)"

assert compute_metric([1,0],[0.5,1],'mae') == 0.75,\
    "uncorrect mae metric behaviour"

assert np.round(compute_metric([[1,0,1],[0,0,0]],[[0.5,0.9,0.1],[0.9,0.9,0.1]],
                               'categorical_crossentropy'),
                decimals=8) == 1.49786614,\
    "uncorrect categorical_crossentropy metric behaviour"

assert np.round(compute_metric([1,0],[0.9,0.1],'binary_crossentropy'),
                decimals=8) == 0.10536052,\
    "uncorrect binary_crossentropy metric behaviour"

### raise error if a y vector of dim 2 is inputed and binary_crossentropy is selected

In [37]:
from unittest import TestCase

test=TestCase()
with test.assertRaises(ValueError) as context:
    compute_metric([[1,0,1],[0,0,0]],
                   [[0.5,0.9,0.1],
                    [0.9,0.9,0.1]],
                   'binary_crossentropy')
assert '1 max for binary_crossentropy' in str(context.exception),\
    "no or wrong Exception raised when inputing 2D y/y_pred vectors\
     with binary_crossentropy selected in compute_metric function"

### raise error for unknown metric

In [38]:
from unittest import TestCase

test=TestCase()
with test.assertRaises(ValueError) as context:
    compute_metric([0],[0],'typo_error')
assert 'Unknown metric' in str(context.exception),\
    "no or wrong Exception raised when inputing\
     unknown metric in compute_metric function"

## fit method tests

In [32]:
my_first_nn=handmade_nn(input_dim = 2)
my_first_nn.add_dense_layer(4, 'relu')
my_first_nn.add_dense_layer(1, 'sigmoid')
my_first_nn.fit()

TypeError: fit() missing 2 required positional arguments: 'X' and 'y'

# Trash section : small tests, to be deleted

In [43]:
bia=np.array([1,2,3])
bia.shape

(3,)

In [44]:
npp=np.zeros((3,2))
npp

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [56]:
npp+np.expand_dims(bia,axis=1)

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

In [47]:
biab=np.expand_dims(bia,axis=1)

In [55]:
npp+biab

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

In [59]:
for layer_index in range(10-1, -1, -1):
    print(layer_index)

9
8
7
6
5
4
3
2
1
0


In [60]:
bia=np.array([[1,2,3],[-1,3,-1]])
bia

array([[ 1,  2,  3],
       [-1,  3, -1]])

In [62]:
np.int(bia>0)

TypeError: only size-1 arrays can be converted to Python scalars

In [63]:
np.cast(bia,int)

TypeError: '_typedict' object is not callable

In [64]:
bia

array([[ 1,  2,  3],
       [-1,  3, -1]])

In [70]:
(bia>0).astype(int)

array([[1, 1, 1],
       [0, 1, 0]])

In [68]:
mask.astype(int)

array([[1, 1, 1],
       [0, 1, 0]])

In [71]:
bia

array([[ 1,  2,  3],
       [-1,  3, -1]])

In [72]:
np.square(bia)

array([[1, 4, 9],
       [1, 9, 1]])

In [73]:
bia/np.square(bia)

array([[ 1.        ,  0.5       ,  0.33333333],
       [-1.        ,  0.33333333, -1.        ]])

In [76]:
bia.sum(axis=0)

array([0, 5, 2])

In [77]:
bia

array([[ 1,  2,  3],
       [-1,  3, -1]])

In [78]:
bibi=bia.T
bibi

array([[ 1, -1],
       [ 2,  3],
       [ 3, -1]])

In [79]:
bibi.sum(axis=0)

array([6, 1])

In [80]:
exp_x = np.exp(bibi)
sigma_exp_x = exp_x.sum(axis=0)

In [81]:
exp_x

array([[ 2.71828183,  0.36787944],
       [ 7.3890561 , 20.08553692],
       [20.08553692,  0.36787944]])

In [82]:
sigma_exp_x

array([30.19287485, 20.82129581])

In [83]:
exp_x/sigma_exp_x

array([[0.09003057, 0.01766842],
       [0.24472847, 0.96466316],
       [0.66524096, 0.01766842]])

In [87]:
(exp_x * sigma_exp_x - np.square(exp_x)) / np.square(sigma_exp_x)

array([[0.08192507, 0.01735625],
       [0.18483645, 0.03408815],
       [0.22269543, 0.01735625]])

In [88]:
exp_x * sigma_exp_x - np.square(exp_x)

array([[ 74.68368696,   7.52439138],
       [168.49869603,  14.7781122 ],
       [203.01130914,   7.52439138]])

In [94]:
np.array([[-1,0], [0, 1], [1, 2]])

array([[-1,  0],
       [ 0,  1],
       [ 1,  2]])