In [5]:
import numpy as np
import matplotlib.pyplot as plt

In [233]:
class Network():
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.w = self.Compile(input_shape, output_shape)
        self.alpha=0.05
    
    
    def fit_heb(self, X, Y):
        for elem in zip(X,Y):
            self.update_w(elem[0], elem[1], self.w)
            
    
    def remove(self,x,y):
        self.update_w(-x, y, self.w)
    
    
    def add(self, x,y):
        self.update_w(x, y, self.w)
        
    def predict(self,X):
        result = []
        for x in X:
            result.append(self.activate(self.WxX(self.w, x)))
        
        result = np.array(result)
        
        return result
    
    def predict_delta_rule_trained(self,X):
        result = []
        for x in X:
            result.append(self.predict_sigmoid(x)[1])
            
        resutl = np.array(result)
        return result
    
    
    def predict_sigmoid(self,x):
        raw = self.WxX(self.w, x)
        activated = self.sigmoid_bipolar(raw)
        return raw, activated
    
    def fit_delta_rule(self, X,Y, epochs=10):
        
        for _ in range(epochs):
            for x,y in zip(X,Y):
                raw_output, activated_output = self.predict_sigmoid(x)
                vec1 = self.alpha*(activated_output - y)* 2*self.dev_sigmoid(raw_output)
                vec2 = x
                self.w += vec1.reshape((-1,1)) @ vec2.reshape((1,-1))

    #### Low Level Functions ####
    
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def sigmoid_bipolar(self,x):
        return 2*self.sigmoid(x)- 1
    
    def dev_sigmoid(self,x):
        return self.sigmoid(x)*(1-self.sigmoid(x))
    
    def Compile(self, input_shape, output_shape):
        '''
        this function will initialize the w_list matrix
        the initial values will be zero
        '''
        w = np.zeros((output_shape, input_shape))
        return w
    
    def update_w(self,x,y,w):
        w += y.reshape((-1,1))@x.reshape((1,-1))
    
    
    def activate(self,x):
        x[x<0] = -1
        x[x>0] = 1
        x[x==0] = 0
        return x
        
    def WxX(self,w,x):
        result =  w @ x.reshape((-1,1))
        return result.reshape(-1)  # returns shape less

In [102]:
input_shape = 4
output_shape = 1


In [239]:
x = [[1,1,1,1],[-1,1,-1,-1],[1,1,1,-1],[1,-1,-1,1]]
y = [[1],[1],[-1],[-1]]
x = np.array(x)
y = np.array(y)

## Training with Hebbian rule

1. to add a s:t pair to the network, update the weights matrix with the following relations:

$W = W + Y.X^T$

2. to remove a s:t pair from the network, update the weights matirx with following realtion:

$W = W + Y^c.X^T$

in which $Y^c$ can be obtained by turning all of the 1's in the $Y$ to -1 and viceversa



In [104]:
model = Network(input_shape, output_shape)

In [105]:
model.fit_heb(x,y)

In [106]:
model.w

array([[-2.,  2.,  0.,  0.]])

In [107]:
model.predict(x)

array([[ 0.],
       [ 1.],
       [ 0.],
       [-1.]])

## Learning with Delta Rule

### 1. Activation function

Since using the delta rule to trian a network requires an activation function that has derivative, so we set the activation function to be sigmoid. But note that the sigmoid function should be changed to a biplora format (with outputs that varies form -1 to 1 rather than 0 to 1 in the regular one)

$\sigma_{binary}(x) = \dfrac{1}{1 + e^{-x}} \Rightarrow \sigma_{bipolar}(x) = 2 \sigma_{binary}(x)-1 $

and also for the derivative we can write:

$\sigma'_{binary}(x) = \sigma(x)(1-\sigma(x)) \Rightarrow \sigma'_{bipolar}(x) = 2\sigma'_{binary}(x) = 2\sigma(x)(1-\sigma(x)) $


### 2. updating rule

$\Delta w_{ji} = \alpha (t_j - y_j) \sigma_{bipolar}'(h_j) x_i = 2\alpha (t_j - y_j) \sigma'(h_j) x_i $

in which $w_{ji}$ is the connection from neuron $i$ in the input layer to the neuron $j$ in the output layer. 

And also the $h_j$ is the input of neuron $j$ in which is in fact the weighted average of the output of the neurons in the previous layer 



In [240]:
model = Network(input_shape, output_shape)

In [245]:
model.fit_delta_rule(x,y)

In [246]:
model.predict_delta_rule_trained(x)

[array([-0.15807176]),
 array([-0.84384648]),
 array([-0.03171385]),
 array([0.85788382])]

### answer to comparison between methods:
As you can see the network which is trained using the hebb learning rule is more persistent against noise and also has very lower prediction error 