# Training ```FFANN```

Let's augment the ```FFANN``` class in order to be able to train it.

This is still a work in progress, but at least it can "fit" the *XOR*-gate and a linear function.

In [1]:
import FeedForwardANN as FFANN
import numpy as np

In [2]:
class FFv2(FFANN.FFANN):
    '''
    We will try to augment the FFANN class, in order to be able to train it.
    Basically, we will add an optimizer. 
    '''
    
    def __init__(self, _inputs,_outputs,_hidden_nodes, activations, loss, strategy):
        '''
        Everything the same as the base class.
        loss is the loss function.
        strategy is our optimizer.
        '''
        super().__init__( _inputs,_outputs,_hidden_nodes, activations)
        
        #I don't like this. There has to be another, more elegant, way to do this...
        self.loss=loss(self)
        self.strategy=strategy(self)
        
    def SGD(self, data_in, data_out,alpha=1e-2, abs_tol=1e-5, rel_tol=1e-3, step_break=100,max_step=5000):
        '''
        This how I think the training (via stochastic gradient descent) should work.
        You have to pass the data (data_in are the inputs and data_out their corresponding outputs),
        and define the loss (and its derivative).
        '''
        #size of data
        data_size=len(data_in)
        
        _s=0
        count_steps=1
        while count_steps<=max_step:
            
            #get a random data point (it will be passed to the strategy.update function)
            index=np.random.randint(data_size)
            x=data_in[index]
            t=data_out[index]
            #run for x as the input, in order to get the output signal and be prepeared
            #to get its derivatives over the parameters.
            self.inputSignal(x)
            self.feedForward()
            self.backPropagation()

            
            #strategy should have an update member. During this, w's and b's are updated.
            #it can return sumething that becomes less than one, when the gradient becomes
            #small enough (we can change it later, to make the convergence conditions more 
            #involved). 
            #It also should have access to "self" because it will need to know the 
            #structure of the network, the loss, and take their derivatives.
            _check=self.strategy.update(t, alpha, abs_tol, rel_tol)
            #print(self.loss(self.signals[self.total_layers-1],t) )
            count_steps+=1             
                
            if _check<1:
                _s+=1
            else:
                _s=0
            
            if _s>step_break:
                break

    

In [3]:
class loss:
    def __init__(self,FFANN):
        self.nodes=FFANN.nodes
        self.FFANN=FFANN
        self.total_layers=self.FFANN.total_layers
        self.N=self.nodes[self.total_layers-1]
        
        #I will use these to hold the derivatives wrt w and b from FFANN
        self.dQdw=0
        self.dQdb=0
        
    def __call__(self,signal_out,data_out):
        sum_Q=0
        
        for r in range(self.N):
            sum_Q+=(signal_out[r]-data_out[r])**2.
        sum_Q=sum_Q/(float(self.N)) #take the average
        return sum_Q
    
    def dQds(self, signal_out, data_out):
        '''define a component of the derivative of Q (wrt signal_out)'''
        
        return 2*(signal_out - data_out)/(float(self.N))
        
    def grad(self,l,j,i,signal_out,data_out):
        #calculate the derivatives wrt w^{(l)}_{ji} and b^{(l)}_{j}
        
        self.FFANN.derivative_bw(l,j,i)
        #the derivative in general is 
        #\dfrac{\partial Q}{\partial P} = \dfrac{\partial Q}{\partial signal^{N-1}_{r}}\dfrac{\partial signal^{N-1}_{r}}{\partial P}
        self.dQdw=0
        self.dQdb=0
        
        
        for r in range(self.N):
            self.dQdw += self.dQds(signal_out[r],data_out[r])*self.FFANN.dsdw[r]
            self.dQdb += self.dQds(signal_out[r],data_out[r])*self.FFANN.dsdb[r]
            
        
        

In [4]:
class VanillaSGD:
    '''
    Not the best (far from it) strategy, but the simplest. Will use it to test if the implementation works.
    '''
    def __init__(self,FFANN):
        self.FFANN=FFANN
    
    def update(self, data_out,alpha=1e-2,abs_tol=1e-5, rel_tol=1e-3):
        '''
        during the update step, you calculate the gradient of Q
        and update w and b. 
        '''
        signal_out=self.FFANN.signals[self.FFANN.total_layers-1]
        for l in range(self.FFANN.total_layers-1):
            for j in range(self.FFANN.nodes[l+1]):
                for i in range(self.FFANN.nodes[l]):
                    self.FFANN.loss.grad(l,j,i,signal_out,data_out)
                    
                    self.FFANN.addToWeight(l,j,i, -alpha*self.FFANN.loss.dQdw)
                
                self.FFANN.addToBias(l,j, -alpha*self.FFANN.loss.dQdb)
                
                
        return 0

In [5]:
lin=FFANN.linearActivation()
sig=FFANN.sigmoidActivation()



In [6]:
# data_in=[[_] for _ in np.linspace(0,1,25)]
# data_out=[[2*_] for _ in np.linspace(0,1,25)]
# brain=FFv2(1,1,[3,2,1],[lin,lin,lin,lin,lin],loss,VanillaSGD)
# brain.init_params(-1,1)


data_in=[[1,1],[0,0],[1,0],[0,1]]
data_out=[[0],[0],[1],[1]]
brain=FFv2(2,1,[3],[sig,lin],loss,VanillaSGD)
brain.init_params(-1,1)

In [7]:
brain.SGD(data_in, data_out,alpha=1e-1, abs_tol=1e-2, rel_tol=1e-3, step_break=5000,max_step=500000)

In [8]:
Q=0
for i,_ in enumerate(data_in):
    print('point:',_,'\n'
        'target:',data_out[i],
          'FFANN gives:',brain(_),
          'loss:',brain.loss(brain(_),data_out[i]))
    print('')
    Q+=brain.loss(brain(_),data_out[i])
    
print('mean loss:', Q/float(len(data_out)))

point: [1, 1] 
target: [0] FFANN gives: [0.00015414293633497778] loss: 2.376004482196901e-08

point: [0, 0] 
target: [0] FFANN gives: [8.05169216215873e-05] loss: 6.482974667416832e-09

point: [1, 0] 
target: [1] FFANN gives: [0.9999937554773366] loss: 3.899406329373049e-11

point: [0, 1] 
target: [1] FFANN gives: [1.0000084038923998] loss: 7.062540746666597e-11

mean loss: 7.58815974003656e-09
