# Training ```FFANN```

Let's augment the ```FFANN``` class in order to be able to train it.

This is still a work in progress, but at least it can "fit" the *XOR*-gate and a linear function.

In [1]:
import FeedForwardANN as FFANN
import numpy as np

In [2]:
class RMSpropSGD:
    '''
    RMSpropSGD strategy. Better than vanilla, but still not that good
    '''
    def __init__(self,FFANN,loss,gamma=0.95,epsilon=1e-6,alpha=1e-2):
        '''
        FFANN: the feed-forward neural network
        loss: the loss function
        gamma: the decaying parameter
        epsilon: safety parameter (to avoid division by 0)
        alpha: learning rate

        '''
        self.FFANN=FFANN
        self.loss=loss
        self.gamma=gamma
        self.epsilon=epsilon
        self.alpha=alpha

        #counters for the decaying means         
        self.meanWeights=[ [[0 for i in range(self.FFANN.nodes[l])] for j in range(self.FFANN.nodes[l+1])]  for l in range(self.FFANN.total_layers-1)]
        self.meanBiases=[ [0 for j in range(self.FFANN.nodes[l+1])]  for l in range(self.FFANN.total_layers-1)]      

        
    def update(self, data_out,abs_tol=1e-5, rel_tol=1e-3):
        '''
        during the update step, you calculate the gradient of Q
        and update w and b. 
        '''
        #These are the output signals of FFANN. The update should run after
        #FFANN.feedForward() and FFANN.backPropagation().
        signal_out=self.FFANN.signals[self.FFANN.total_layers-1] 
            
        #these will be used to determine if the stopping conditions are satisfied 
        _w2=0
        _check=0

        
        for l in range(self.FFANN.total_layers-1):
            for j in range(self.FFANN.nodes[l+1]):
                for i in range(self.FFANN.nodes[l]):
                    #get the grad of the loss. The results should be stored in loss.dQdw and loss.dQdb
                    #This way it should be easy to update the weights and biases of FFANN
                    self.loss.grad(l,j,i,signal_out,data_out)
                    
                    
                    self.meanWeights[l][j][i]=self.gamma*self.meanWeights[l][j][i] + (1-self.gamma)*self.loss.dQdw**2 
                    dw=self.alpha/np.sqrt( (self.meanWeights[l][j][i]+self.epsilon)  )*self.loss.dQdw
                    
                    #update the weight using loss.dQdw
                    self.FFANN.addToWeight(l,j,i, -dw)

                    _w2=abs_tol + self.FFANN.weights[l][j][i] * rel_tol
                    _check+=(self.loss.dQdw/_w2)*(self.loss.dQdw/_w2)

                #update the bias using loss.dQdb (it is the same for all i, so don't run loss.grad again).
                self.meanBiases[l][j]=self.gamma*self.meanBiases[l][j] + (1-self.gamma)*self.loss.dQdb**2 
                dw=self.alpha/np.sqrt( (self.meanBiases[l][j]+self.epsilon)  )*self.loss.dQdb
                 
                self.FFANN.addToBias(l,j, -dw)
                
                _w2=abs_tol + self.FFANN.biases[l][j] * rel_tol
                _check+=(self.loss.dQdb/_w2)*(self.loss.dQdb/_w2)
                
                
        _check=np.sqrt(1./self.loss.N *_check)
        return _check

In [3]:
lin=FFANN.linearActivation()
sig=FFANN.sigmoidActivation()



In [4]:
# data_in=[[_] for _ in np.linspace(0,1,250)]
# data_out=[[2*_] for _ in np.linspace(0,1,250)]
# brain=FFANN.FFANN(1,1,[3,2,1],[lin,lin,lin,lin,lin])
# brain.init_params(-1,1)


data_in=[[1,1],[0,0],[1,0],[0,1]]
data_out=[[0],[0],[1],[1]]
brain=FFANN.FFANN(2,1,[3],[sig,lin])
brain.init_params(-1,1)

In [5]:
def Q_i(signal,target):
    return (signal-target)**2.

def dQds_i(signal,target):
    return 2.*(signal-target)

In [6]:
Q=FFANN.loss(Q_i, dQds_i, brain)
# strategy=FFANN.VanillaSGD(brain,Q,alpha=1e-1)
strategy=FFANN.RMSpropSGD(brain,Q,gamma=0.995,epsilon=1e-5,alpha=1e-2)

In [7]:
brain.SGD(strategy, data_in, data_out, abs_tol=1e-2, rel_tol=1e-3, step_break=150,max_step=15000)

In [8]:
meanQ=0
maxQ=0
for i,_ in enumerate(data_in):
    Qi=Q(brain(_),data_out[i])
    print('point:',_,'\n'
       'target:',data_out[i],
         'FFANN gives:',brain(_),
         'loss:',Qi)
    print('')
    if Qi > maxQ:
        maxQ=Qi
    meanQ+=Q(brain(_),data_out[i])
    
print('mean loss:', meanQ/float(len(data_out)))
print('max loss:', maxQ)


point: [1, 1] 
target: [0] FFANN gives: [-0.000158628125000837] loss: 2.516288204128117e-08

point: [0, 0] 
target: [0] FFANN gives: [0.0004516103365411195] loss: 2.039518960707832e-07

point: [1, 0] 
target: [1] FFANN gives: [0.9997477511742452] loss: 6.362947009467965e-08

point: [0, 1] 
target: [1] FFANN gives: [0.9997928215967571] loss: 4.292289077028331e-08

mean loss: 8.391678474425684e-08
max loss: 2.039518960707832e-07
