# My own BackPropagation Implementation

## Basic definitions

In [1]:
from math import sqrt
import numpy as np
import matplotlib.pyplot as plt
import random

#let's define some basic functions. Even though I am not using anything besides sigmoid function
#, I have defined the softmax and the derivative 
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def identity(x):
    return x

def relu(x):
    return np.maximum(np.zeros(x.shape[0]),x)

def identity_derivative(x):
    return 1


def sigmoid_derivative(sigmoid_x): # we calculate the derivative based on the sigmoid function value
    return sigmoid_x*(1-sigmoid_x)

def tanh_derivative(x):
    return 1-x**2

def derivative(f):
    if f == sigmoid:
        return sigmoid_derivative
    elif f == np.tanh:
        return tanh_derivative
    elif f == identity:
        return identity_derivative
    else:
        return None
    
    
def local_max_indices(x,pool_d): #if is not devidable wihch can happen as a result of conv, we need to do something about it
    """Return maximum in groups of pool_dxpool_d for a N,h,w image"""
    N,h,w = x.shape
    x = x.reshape(N,h/pool_d,pool_d,w/pool_d,pool_d).swapaxes(2,3).reshape(N,h/pool_d,w/pool_d,pool_d*pool_d)
    return np.argmax(x,axis=3)

def global_max_indices(x,pool_d):
    N = x.shape[0]
    image_d= x.shape[1]
    ip_ratio = image_d / pool_d
    lmi = local_max_indices(x,pool_d)
    max_local_x,max_local_y = np.unravel_index(lmi.flat,dims=(pool_d,pool_d))
    max_y =  max_local_y + np.tile(np.tile(range(ip_ratio),ip_ratio)*pool_d,N)
    max_x =  max_local_x + np.tile(np.repeat(np.arange(ip_ratio), ip_ratio)*pool_d,N)
    Ns = np.repeat(np.arange(N),ip_ratio**2)
    return np.vstack([Ns,max_x,max_y]).T

def maxpool(x,pool_d):
    N= x.shape[0]
    image_d = x.shape[1]
    crop_length = image_d%pool_d
    x = x[:,:image_d-crop_length,:image_d-crop_length]
    gmi = global_max_indices(x,pool_d)
    maxes =  x[gmi[:,0],gmi[:,1],gmi[:,2]].reshape(N,image_d/pool_d,image_d/pool_d)
    return crop_length,gmi,maxes

def up_sample(gmi,values,image_d,pool_d,crop_length):
    N =values.shape[0] 
    pool_d = values.shape[1]
    ip_ratio = image_d / pool_d
    out = np.zeros([N,image_d,image_d])
    out[gmi[:,0],gmi[:,1],gmi[:,2]] = 1
    val_repeated = np.repeat(np.repeat(values,ip_ratio,axis=1),ip_ratio,axis=2)
    return out * val_repeated

# My backpropagation for CNN training

In [52]:
from math import sqrt
import numpy as np
import random
from scipy.signal import convolve2d

class bornaCNN:
    def __init__(self, mlp_layers_sizes, mlp_activations, CNN_activations,input_dimension ,num_of_kernels=5,kernels_dimensions=[5], 
                 max_pool_dimensions = [2] , batch_size=10, eta=0.05, epocs=1000, update='online', 
                 minibatch_percentage=0.1, verbose=True):
        
        self.minibatch_percentage = minibatch_percentage
        self.update=update# other values batch, mini-batch
        self.errors=[]
        self.eta=eta
        self.maxp_crop_lenghts=[]

        self.epocs=epocs
        self.num_of_kernels = num_of_kernels        
        self.kernels_dimensions = kernels_dimensions
        self.mlp_activations,self.CNN_activations=mlp_activations,CNN_activations
        self.max_pool_dimensions = max_pool_dimensions
        ######## mlp_layers_sizes doesn't contain the the number of nodes between the last maxp
        # wecalculate this
        
        lcmld = input_dimension # lcmd = Last Conv-Max Layer Dimension
        for i in range(len(kernels_dimensions)):
            lcmld = lcmld - kernels_dimensions[i]+1
            lcmld = lcmld / max_pool_dimensions[i]

        self.mlp_layers_sizes = np.insert(mlp_layers_sizes,0,num_of_kernels*lcmld*lcmld) # does not contain the 
        ########
        self.num_mlp_layers = len(self.mlp_layers_sizes )
        self.verbose= verbose
        self.kernels_W , self.kernels_B = self.generate_random_kernels()
        self.mlp_W , self.mlp_B= self.generate_random_mlp_weights()
        self.maxp_gmi = []# global max index of the maxpool layer. we use it to back propagate the deltas backward
        self.num_of_kernels = num_of_kernels
    
    
    def generate_random_kernels(self):#for now, we just use the same initialization technic that we use for MLP weights
        kernels_W={}
        kernels_B={}
        for i,kd in enumerate(self.kernels_dimensions):# number of Convolutions layers
            i = 2*i
            #upper = 4*sqrt(6)/sqrt(self.layers_sizes[i]+self.layers_sizes[i+1])
            upper = 4*sqrt(6)/sqrt(kd**2+1)
            key = str(i)+'-'+str(i+1)
            kernels_B[key] = np.zeros(self.num_of_kernels)
            kernels_W[key] = np.random.uniform(-upper,upper,[self.num_of_kernels,kd,kd])
        return kernels_W,kernels_B
        
    def generate_random_mlp_weights(self):
        mlp_W={}
        mlp_B={}
        for i in range(len(self.mlp_layers_sizes)-1):
            upper = 4*sqrt(6)/sqrt(self.mlp_layers_sizes[i]+self.mlp_layers_sizes[i+1])
            #weight also includes biases
            mlp_B[str(i)+"-"+str(i+1)]=np.zeros(self.mlp_layers_sizes[i+1])
            mlp_W[str(i)+"-"+str(i+1)] = np.random.uniform(-upper,upper, self.mlp_layers_sizes[i:i+2])#np.ones( self.layers_sizes[i:i+2])
        return mlp_W,mlp_B
    
    def _calc_error(self,X,Y):
        prediction = self.feed_forward(X)
        return .5*np.sum((Y-prediction[-1])**2)/(X.shape[0])
        
    def fit(self,X,Y):
        self.W , self.B= self.generate_random_weights()
        X=np.array(X)
        Y=np.array(Y)
        
        self.errors.append(self._calc_error(X,Y))# Error before we start training        
        for i in range(self.epocs):
            if self.verbose:
                if i<10:
                    print ("epoc->",i+1)
                elif i<100 and i%10==0:
                    print ("epoc->",i)
                elif i<1000 and i%100==0:
                    print ("epoc->",i)
                elif i<10000 and i%1000==0:
                    print ("epoc->",i)
                elif i<100000 and i%10000==0:
                    print ("epoc->",i)
            
            for j in range(X.shape[0]):
                delta_W, delta_B = self.calc_weight_updates(X[j,:],Y[j])
                self.update_weights(delta_W, delta_B)
            self.errors.append(self._calc_error(X,Y))
                
    def update_weights(self,delta_W,delta_B):
        for i in range(self.num_layers-1): #going through layers
            index=str(i)+"-"+str(i+1)
            self.W[index] -= delta_W[index]
            self.B[index] -= delta_B[index]

    def predict(self,X):
        nn_output = self.feed_forward(X)
        labels = np.argmax(nn_output[-1],axis=1)
        return labels
        
    def calc_weight_updates(self,inputx,target):
        ffr = self.feed_forward(inputx)
        # https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/        
        #step1: output layer
        delta_W={}
        delta_B={}
        
        this_output = ffr[-1]
        previous_output = ffr[-2]

        #this_delta = ((this_output - target)) * (this_output * (1-this_output))
        act_deriv = derivative(self.activations[self.num_layers-2])
        this_delta = ((this_output - target)) * act_deriv(this_output) 

        weight_chagnge = np.outer(previous_output , this_delta)
        
        delta_W[str(self.num_layers-2)+"-"+str(self.num_layers-1)] = self.eta * weight_chagnge
        delta_B[str(self.num_layers-2)+"-"+str(self.num_layers-1)] = self.eta *this_delta
        
        #step2: Hidden Layers
        for i in reversed(range(1,self.num_layers-1)): # Going through all the layers backwards
            next_layer_delta = this_delta
            
            hl_input = ffr[i-1]
            hl_output = ffr[i]
            hl_out_weights = self.W[str(i)+"-"+str(i+1)]
            #this_delta =np.dot(hl_out_weights, next_layer_delta) * hl_output *(1-hl_output)
            act_deriv = derivative(self.activations[i-1])
            this_delta =np.dot(hl_out_weights, next_layer_delta) * act_deriv(hl_output)
            
            weight_chagnge = np.outer(hl_input,this_delta)
            delta_W[str(i-1)+"-"+str(i)] = self.eta* weight_chagnge     
            delta_B[str(i-1)+"-"+str(i)] = self.eta* this_delta
            
        # step3 iterating betwen Maxpool and conv steps and calculating the weight updates
            
        return delta_W, delta_B
    
    def feed_forward(self,x): #feed forward, x is a 2d matrix
        x = np.array(x)        
        ffr_K = []
        layer_input = np.repeat(x[np.newaxis],self.num_of_kernels,axis=0)# make 2d inpu to k, 2d inpus
        
        for i,kd in enumerate(self.kernels_dimensions): #going through each Con layer and the max pool layer
            # We do apply the kernels first
            conv_layer_output = np.empty([self.num_of_kernels,layer_input.shape[1]-kd+1,layer_input.shape[1]-kd+1],dtype=float) # conv_output empty
            key = str(2*i)+'-'+str(2*i+1)
            Ks,Bs = self.kernels_W[key] , self.kernels_B[key]
            for j in range(Ks.shape[0]):#iterating through the each kernel
                conv_layer_output[j] = convolve2d(layer_input[j],Ks[j],'valid') #+ Bs[j]
                ##CNN_activations!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                
            ffr_K.append(conv_layer_output) #append the convolution
            ## Max pool dimension
            maxp_d = self.max_pool_dimensions[i]
            crop_length, gmi, maxp_output = maxpool(conv_layer_output,maxp_d)
            self.maxp_gmi.append(gmi)
            self.maxp_crop_lenghts.append(crop_length)
            
            ffr_K.append(maxp_output)
            layer_input = maxp_output # for the next iteration

            
            
        # before passing on the last layer to the fully connect network, we need to flatten it: layer_input.reshape(-1)
        #layer_input is actually the output of last layer before MLP
        
        x = layer_input.reshape(-1)
        print "x.shape",x.shape
        ffr_mlp=[x]#Feed Forward Result
        for i in range(self.num_mlp_layers-1):
            W = self.mlp_W[str(i)+"-"+str(i+1)]
            B = self.mlp_B[str(i)+"-"+str(i+1)]
            y = self.mlp_activations[i](np.dot(x,W) + B) #clculate the output of the layer
            x=y
            ffr_mlp.append(y)
        return ffr_K,gmi,ffr_mlp

In [53]:
bornacnn = bornaCNN( mlp_layers_sizes=[5,2], mlp_activations=[sigmoid,sigmoid], CNN_activations = [sigmoid,sigmoid],input_dimension = 500 ,
                    num_of_kernels=2,kernels_dimensions=[2,2], max_pool_dimensions = [2,2] )

In [54]:
x = np.random.rand(500,500)
ffr_K,gmi,ffr_mlp = bornacnn.feed_forward(x)



x.shape (30752L,)
