In [6]:
from sklearn import datasets
import numpy as np

IRIS
-------
Import the data:

In [8]:
irisData = datasets.load_iris()['data']
class1 = irisData[0:50]
class2 = irisData[50:100]
class3 = irisData[100:150]
classes = [class1, class2, class3]

features = np.array([0,1,2,3])

Extract the training and testing data for the different classes: 

In [9]:
training1 = [class1[0:30], features]
training2 = [class2[0:30], features]
training3 = [class3[0:30], features]
training = [training1, training2, training3]

testing1 = [class1[30:50], features]
testing2 = [class2[30:50], features]
testing3 = [class3[30:50], features]
testing = [testing1, testing2, testing3]

In order to classify the data we need to define the sigmoid function:

In [10]:
def sigmoid(X):
    return 1/(1+np.exp(-X))

The training is done by minimizing the mean square error of the classifier.  The MSE is given by:

$
MSE = \frac{1}{2}\sum_{k=1}^{N} (g_k - t_k)^T(g_k-t_k)
$

t_k is the true classification of each $x_k$ and $g_k = Wx_k$. W is a weighting matrix for the differnte values in the vector x. This the only tweakable parameter and thus, in order to minimize the MSE, we need to find the gradient of the MSE with respect to W and update W with a step size, $\alpha$ times the gradient. This is done many times iterativly.


Defining some parameters for the trainging of the classifier:

In [104]:
N_iter = 5000 # Number of iterations
alpha = 1 # Step length

features = [0, 1, 2, 3]
F = len(features) # Number of features
C = len(classes)  # Number of classes

Defining the weighting matrix and the target data, $t_k$:

In [150]:
alpha = 0.1
W = np.zeros((C, F))
w_0 = np.zeros((C, 1))
W = np.concatenate((W, w_0), axis=1)

target1 = np.array(([1],[0],[0]))
target2 = np.array(([0],[1],[0]))
target3 = np.array(([0],[0],[1]))
target = [target1, target2, target3]

W_last = [[1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1]]

for _ in range(N_iter):
        W_prev = W
        grad_MSE = np.zeros((C,F+1))
        for k in range(len(training[0])):
                for (x, tk) in zip(training, target):
                        xk = np.append(x[k],1)
                        xk = xk.reshape(F+1, 1) 
                        zk = W@xk
                        gk = sigmoid(zk)
                        temp = np.multiply(gk-tk, gk)
                        temp = np.multiply(temp, np.ones((C,1))-gk)
                        grad_MSE += temp@xk.T
        W = W_prev - alpha*grad_MSE
print(np.linalg.norm(W))
print(W)

5.665105419993294
[[-2.85996390e+00 -8.52474885e-01 -3.17374011e+00 -1.16749871e+00
  -3.74992033e-01]
 [-2.07730572e+00 -1.34741292e+00 -1.02861539e+00 -3.37459567e-01
  -3.74962388e-01]
 [-1.60307851e+00 -1.19823472e+00  1.84082703e-03  1.81929364e-01
  -3.59220179e-01]]


In [152]:
for c in range(len(classes)):
    for x in testing[c]:
        xk = np.append(x,1)
        xk = xk.reshape(F+1, 1) 
        zk = W@xk
        print(zk)
        gk = sigmoid(zk)

[[-22.05697479]
 [-16.23628641]
 [-11.72919347]]
[[-23.94482133]
 [-17.85152409]
 [-13.01430921]]
[[-23.61931136]
 [-18.27801412]
 [-13.58703662]]
[[-24.36192387]
 [-18.96683154]
 [-14.16977479]]
[[-22.02559717]
 [-16.34115544]
 [-11.88968541]]
[[-21.44471902]
 [-16.37504269]
 [-12.17036898]]
[[-23.44781744]
 [-17.92078096]
 [-13.33119457]]
[[-22.01771073]
 [-16.87825441]
 [-12.50717979]]
[[-19.87561971]
 [-14.96203822]
 [-10.96869085]]
[[-22.85333242]
 [-17.16084046]
 [-12.56977153]]
[[-22.13458536]
 [-16.91587406]
 [-12.51146238]]
[[-19.68163355]
 [-14.2603257 ]
 [-10.27204145]]
[[-20.04611469]
 [-15.2315208 ]
 [-11.20833779]]
[[-23.43695701]
 [-17.32569655]
 [-12.45633132]]
[[-24.69731816]
 [-18.1787437 ]
 [-13.01194321]]
[[-21.45372915]
 [-15.929568  ]
 [-11.59154523]]
[[-23.51169638]
 [-17.80266717]
 [-13.04888133]]
[[-20.93548148]
 [-15.74984348]
 [-11.52876941]]
[[-23.68106766]
 [-17.98052548]
 [-13.24985765]]
[[-22.16471453]
 [-16.71550706]
 [-12.28982429]]
[[-31.49519418]
 [-1