# Erasmus Neural Networks
http://michalbereta.pl/nn
## Perceptron learning


## Before you start

Exacute the examples.

Then, do the tasks and send back the notebook.

Change the name of this notebook according to the schema: {YourSurname}\_{YourFirstName}\_{OriginalFileName}.

Be sure to fill all places with "YOUR ANSWER HERE".

When ready, send the notebook, with all the necessary files zipped, to the teacher.

### Simple perceptron example

Start with initial weights, check the answer for some input values. If there is a misclassification, update the weights.

Observe that, even after one update, the perceptron is closer to the correct answer.

Experiment with different starting values.

In [None]:
#!/usr/bin/env python
import numpy as np

#initial weights
w = np.array([0.3, -0.4])
w0 = 0.1 #bias weight

print('initial weights, w=',w,' w0=',w0)

#input vector
x = np.array([-0.5, 0.9 ])
#class label, 1 or -1
d = 1 

print('example x=',x,' is from class ',d)

#learning rate
eta = 0.1


y = np.dot(w,x) + w0
print('weighted sum, y=',y)

#activation function
if y > 0:
    u = 1
else:
    u = -1
    
print('percetron says: x is from class ',u)
    
if u!=d:
    w[0] = w[0] + eta*x[0]*d
    w[1] = w[1] + eta*x[1]*d
    w0 = w0 + eta*1*d


print('weights after update, w=',w,' w0=',w0)

y = np.dot(w,x) + w0
print('weighted sum, y=',y)

#activation function
if y > 0:
    u = 1
else:
    u = -1    
print('percetron says: x is from class ',u)


### Simple perceptron example 2

Start with initial weights, check the answer for some input values. 

If there is a misclassification, update the weights until the answer is correct.

Experiment with different starting values.

In [None]:
#!/usr/bin/env python
import numpy as np

#initial weights
w = np.array([0.3, -0.4])
w0 = 0.1 #bias weight

#input vector
x = np.array([-0.5, 0.9 ])
#class label
d = 1 

print('example x=',x,' is from class ',d)

#learning rate
eta = 0.1


y = np.dot(w,x) + w0
print('weighted sum, y=',y)

#activation function
if y > 0:
    u = 1
else:
    u = -1
    
print('percetron says: x is from class ',u)
    
while u!=d:
    w[0] = w[0] + eta*x[0]*d
    w[1] = w[1] + eta*x[1]*d
    w0 = w0 + eta*1*d
    print('\nweights after update, w=',w,' w0=',w0)
    y = np.dot(w,x) + w0
    print('weighted sum, y=',y)
    #activation function
    if y > 0:
        u = 1
    else:
        u = -1    
    print('percetron says: x is from class ',u)

print('learning done')

### 2D data from normal distributions

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

#example data from two classes; 2D normal distributions
num = 100
x2 = np.random.multivariate_normal([-2,-2], np.array([[1,0],[0,1]]),num)
x1 = np.random.multivariate_normal([2,2], np.array([[1,0],[0,1]]),num)
xmin = -6
xmax = 6
ymin = -6
ymax = 6

plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)

plt.plot(x1[:,0],x1[:,1],'o')
plt.plot(x2[:,0],x2[:,1],'o')

plt.show()

### Perceptron online learning animation

#### DO NOT USE THIS CODE AS A TEMPLATE FOR YOUR PROGRAMS!

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from time import time


num = 100
x0 = np.random.multivariate_normal([2,2], np.array([[1,0],[0,1]]),num)
x0 = np.hstack((np.repeat(1,num).reshape(num,1), x0)) #adding bias signal explicitly
d0 = np.repeat(0, num)
x1 = np.random.multivariate_normal([-2,-2], np.array([[1,0],[0,1]]),num)
x1 = np.hstack((np.repeat(1,num).reshape(num,1), x1)) #adding bias signal explicitly
d1 = np.repeat(1, num)

X = np.vstack((x0,x1))
d = np.hstack((d0,d1))


def perceptron(x, w):
    u = np.dot(x, w)
    y = 1 if u>0 else 0
    return y

epoch = 100
eta = 0.01

iteration = epoch*X.shape[0]

weights = np.random.random(3) #perceptoron weights
a = -weights[1]/weights[2]
b = -weights[0]/weights[2]

xx = np.linspace(-6 ,6 ,100)
yy = a*xx+b



fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim(-6, 6)
ax.set_ylim(-6, 6)
ax.scatter(x0[:,1], x0[:,2], color='r')
ax.scatter(x1[:,1], x1[:,2], color='g')
ax.plot(xx,yy, lw=2, c='k')

def animate(i):
    flag=False
    e, p = divmod(i,X.shape[0])
    global weights
    y = perceptron(X[p,:], weights)
    if y==1 and d[p]==0:
        flag = True
        weights -= eta*X[p,:]
    if y==0 and d[p]==1:
        flag=True
        weights += eta*X[p,:]
    if flag:
        a = -weights[1]/weights[2]
        b = -weights[0]/weights[2]
        yy = a*xx+b
        ax.clear()
        ax.scatter(x0[:,1], x0[:,2], color='r')
        ax.scatter(x1[:,1], x1[:,2], color='g')
        ax.scatter(X[p,1], X[p,2], color='k', s=80)
        ax.plot(xx,yy, lw=2, c='k')
        ax.text(0.0, -5.0, 'epoch: {0}, sample: {1}'.format(e,p))
        ax.set_xlim(-6, 6)
        ax.set_ylim(-6, 6)
    return ax,

dt = 1./30
t0 = time()
animate(0)
t1 = time()
interval = 100 * dt - (t1 - t0)

anim = animation.FuncAnimation(fig, animate, frames=iteration, interval=interval)

plt.show()

### Perceptron batch learning animation

#### DO NOT USE THIS CODE AS A TEMPLATE FOR YOUR PROGRAMS!

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import matplotlib.animation as animation

#example data from two classes, 2D normal distribution
num = 100
x2 = np.random.multivariate_normal([-2,-2], np.array([[1,0],[0,1]]),num)
x1 = np.random.multivariate_normal([2,2], np.array([[1,0],[0,1]]),num)
xmin = -6
xmax = 6
ymin = -6
ymax = 6

#initialization of weights - random, smal values, positive and negative values
#w[0] is the bias weight
w = 2*np.random.rand(3) - 1
#w = np.zeros(3) #try it: in general, initialization of weigths to zero is not a good idea
print(w)


delta = 0.01 #accuracy of the grid for vizualization only
x = np.arange(xmin, xmax, delta)
y = np.arange(ymin, ymax, delta)
X, Y = np.meshgrid(x, y)
Z = X*w[1] + Y*w[2] + w[0]

fig = plt.figure()

plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
plt.plot(x1[:,0],x1[:,1],'o')
plt.plot(x2[:,0],x2[:,1],'o')

#this function is one iteration of perceptron learning
def update(i):
    global w
    print()
    print('iteration=',i)

    #perceptron responses to examplse from class 1, 
    #we assume they should be  > 0
    ans1 = np.dot(x1, w[1:]) + w[0]  
    errors1 = (ans1<=0).sum() # number of missclassifications from class 1
    print('errors1=',errors1)
    M1 = x1[ans1<=0] #selected examples that are missclassified from class 1
    criterion1 = ans1[ans1<=0].sum() #perceptron criterion - part 1 from the class 1
    print('criterion1=',criterion1)

    #perceptron responses to examplse from class 2, 
    #we assume they should be  <= 0    
    ans2 = np.dot(x2, w[1:]) + w[0]
    errors2 = (ans2>0).sum() # number of missclassifications from class 2
    print('errors2=',errors2)
    M2 = x2[ans2>0] #selected examples that are missclassified from class 2
    criterion2 = ans2[ans2>0].sum()#perceptron criterion - part 2 from the class 1
    print('criterion2=',criterion2)
    
    #full perceptron criterion -we should minimize it 
    criterion = np.abs(criterion1) + np.abs(criterion2) 
    print('criterion=',criterion)

    M1 = M1.sum(axis=0) #summed errors from class 1
    M2 = M2.sum(axis=0) #summed errors from class 2
    M = M1 - M2 #vector giving the direction of the change of the weights vector
    
    print('M=',M)
    eta = 0.005 #learning rate
    
    #weights modification
    if np.abs( M.sum() ) > 0: #or: if criterion > 0:
        w[1] += eta*M[0]
        w[2] += eta*M[1]
        w[0] += eta*(errors1-errors2) #bias weight modification 
        #a trick : normalization of the weight vector
        #connected with the learning rate - both influence the convergence of the training process        
        w = w/np.linalg.norm(w) 
    else:
        print('learning done')
    print('w=',w)
    
    #vizualization
    plt.clf()
    plt.plot(x1[:,0],x1[:,1],'o')
    plt.plot(x2[:,0],x2[:,1],'o')
    Z = X*w[1] + Y*w[2] + w[0]
    Z[Z>0] = 1
    Z[Z<=0] = -1
    im = plt.imshow(Z, interpolation='bilinear', cmap=cm.hot,
                origin='lower', extent=[xmin, xmax, ymin, ymax],
                vmax=abs(Z).max(), vmin=-abs(Z).max())

    
#start learning
ani = animation.FuncAnimation(fig, update, interval=1000, blit=False)
plt.show()

print('end')

### Line separating two points (no bias version)

Does it work for any two points?

Check for example:

```
x1 = np.array([-2, -2]) #from class 1
x2 = np.array([-4, -4]) # from class -1
```

In [None]:
#!/usr/bin/env python
import numpy as np
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import matplotlib.animation as animation


xmin = -6
xmax = 6
ymin = -6
ymax = 6

x1 = np.array([4, 4]) #from class 1
x2 = np.array([-4, -4]) # from class -1


w = (x1 - x2)/2.0
print(w)


delta = 0.01 #accuracy of the grid for vizualization only
x = np.arange(xmin, xmax, delta)
y = np.arange(ymin, ymax, delta)
X, Y = np.meshgrid(x, y)

fig = plt.figure()
plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
plt.plot(x1[0],x1[1],'o')
plt.plot(x2[0],x2[1],'o')
plt.plot(0,0,'o') #origin
    
#vizualization
Z = X*w[0] + Y*w[1]
Z[Z>0] = 1
Z[Z<=0] = -1
im = plt.imshow(Z, interpolation='bilinear', cmap=cm.hot,
            origin='lower', extent=[xmin, xmax, ymin, ymax],
            vmax=abs(Z).max(), vmin=-abs(Z).max())

plt.show()
print('end')

### Line separating two points (version with bias)

Does it work for any two points?

Check for example:

```
x1 = np.array([-2, -2]) #from class 1
x2 = np.array([-4, -4]) # from class -1
```

In [None]:
#!/usr/bin/env python
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt


xmin = -6
xmax = 6
ymin = -6
ymax = 6

x1 = np.array([-2, -1]) #from class 1
x2 = np.array([-4, -4]) # from class -1

v = x1 - x2
mid = (x1 + x2)/2.0

w = v
w0 = -v[0]*mid[0] - v[1]*mid[1]

print(w, w0)

print('for x1: ', np.dot(w,x1) + w0)
print('for x2: ', np.dot(w,x2) + w0)

delta = 0.01 #accuracy of the grid for vizualization only
x = np.arange(xmin, xmax, delta)
y = np.arange(ymin, ymax, delta)
X, Y = np.meshgrid(x, y)

fig = plt.figure()
plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
plt.plot(x1[0],x1[1],'o')
plt.plot(x2[0],x2[1],'o')
plt.plot(0,0,'o') #origin
    
#vizualization
Z = X*w[0] + Y*w[1] + w0
Z[Z>0] = 1
Z[Z<=0] = -1
im = plt.imshow(Z, interpolation='bilinear', cmap=cm.hot,
            origin='lower', extent=[xmin, xmax, ymin, ymax],
            vmax=abs(Z).max(), vmin=-abs(Z).max())
 

plt.show()

print ('end')

### Task 1
What happens when the data generated and used for perceptron training is not linearly separable? Can the perceptron algorithm still be used? How is the value of training rate (eta) important in such a case?



In [None]:
#YOUR ANSWER HERE

### Task 2

Prepare your own implementation of perceptron learning

– Use the online or batch mode

– Be ready for any number of attributes (inputs to perceptron)


In [None]:
#YOUR CODE HERE

### Task 3

Use the 5D data to train your perceptron based on examples from data5D_train.csv. Last column is the class label.

Generate answers (as 1 or -1) for data in data5D_test.csv and save them to a file.

In [None]:
#YOUR CODE HERE