# Erasmus Neural Networks
http://michalbereta.pl/nn
## Softmax layer


## Before you start

Exacute the examples.

Then, do the tasks and send back the notebook.

Change the name of this notebook according to the schema: {YourSurname}\_{YourFirstName}\_{OriginalFileName}.

Be sure to fill all places with "YOUR ANSWER HERE".

When ready, send the notebook, with all the necessary files zipped, to the teacher.

### Softmax transform



In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

#score functions values (unnormalized log probabilities)
f = np.array([10.1, 3.4, 7.5])
print(f)

#unnormalized probabilities
unnorm_probs = np.exp(f)
print(unnorm_probs)

#normalized probabilities
probs = unnorm_probs / np.sum(unnorm_probs)
print(probs)
print(np.sum(probs))

### Softmax transform - trick for numerical stability

In [None]:
import numpy as np
import matplotlib.pyplot as plt

#score functions values (unnormalized log probabilities)
f = np.array([10.1, 3.4, 7.5])
print(f)

#unnormalized probabilities
unnorm_probs = np.exp(f) # this could be numerically problematic
print(unnorm_probs)

#normalized probabilities
probs1 = unnorm_probs / np.sum(unnorm_probs)
print(probs1)
print(np.sum(probs1))

#the trick, for numerical stability
f = f - np.max(f)
print('\nafter the trick...')
unnorm_probs = np.exp(f)
print(unnorm_probs)
probs2 = unnorm_probs / np.sum(unnorm_probs)
print(probs2)
print(np.sum(probs2))
print(probs1 == probs2)
print(np.abs(probs1-probs2).max())

### Experiments with cross-entropy

In [None]:
import numpy as np
dlen = 10

#two uniform random distribution
p = np.random.rand(dlen)
p = p/p.sum()
print(p)
print(p.sum())

q = np.random.rand(dlen)
q = q/q.sum()
print(q)
print(q.sum())

#note that cross_entrophy_pq != cross_entrophy_qp
cross_entrophy_pq = -(p*np.log(q)).sum()
print('cross_entrophy_pq=',cross_entrophy_pq)

cross_entrophy_qp = -(q*np.log(p)).sum()
print('cross_entrophy_qp=',cross_entrophy_qp)

cross_entrophy_pp = -(p*np.log(p)).sum()
print('cross_entrophy_pp=',cross_entrophy_pp)

cross_entrophy_qq = -(q*np.log(q)).sum()
print('cross_entrophy_qq=',cross_entrophy_qq)


#two "peaky" distributions
print()
d1 = np.zeros(dlen, dtype=np.float64) + 1e-15  # 1e-15 is for numerical stability, as we cannot calculate log(0)
d1[1] = 1
d1 = d1/d1.sum()
print(d1)

d2 = np.zeros(dlen, dtype=np.float64) + 1e-15
d2[-2] = 1
d2 = d2/d2.sum()
print(d2)


cross_entrophy_d1d2 = -(d1*np.log(d2)).sum()
print('cross_entrophy_d1d2=',cross_entrophy_d1d2)

cross_entrophy_d2d1 = -(d2*np.log(d1)).sum()
print('cross_entrophy_d2d1=',cross_entrophy_d2d1)

cross_entrophy_d1d1 = -(d1*np.log(d1)).sum()
print('cross_entrophy_d1d1=',cross_entrophy_d1d1)

cross_entrophy_d2d2 = -(d2*np.log(d2)).sum()
print('cross_entrophy_d2d2=',cross_entrophy_d2d2)


### Cross entropy values (mean values over all examples) - recommendation:

Cross-Entropy = 0.00: Perfect probabilities

Cross-Entropy < 0.02: Great

Cross-Entropy < 0.05: Very good

Cross-Entropy < 0.20: Fine

Cross-Entropy > 0.30: Not great

Cross-Entropy > 1.00: Terrible

Cross-Entropy > 2.00 Very bad.


### Simple data from three classe

In [None]:
import numpy as np
import matplotlib.pyplot as plt

X = np.loadtxt('data.txt')
print(X)

plt.plot(X[0:3,0], X[0:3,1],'or')
plt.plot(X[3:6,0], X[3:6,1],'og')
plt.plot(X[6:9,0], X[6:9,1],'ob')
plt.show()

### Sofmax layer for multiclass classification problems

Note: this model is strictly for classification problems

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        pass
    def Forward(self, X): #examples as rows in X
        pass
    def Test(self, X, ClsIndx):
        pass
    def GetProbs(self):
        pass
    def GetPredictions(self):
        pass
    def Learn(self, X, ClsIndx, lrate):
        pass
        


### Sofmax layer for multiclass classification problems - constructor

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        pass
    def Test(self, X, ClsIndx):
        pass
    def GetProbs(self):
        pass
    def GetPredictions(self):
        pass
    def Learn(self, X, ClsIndx, lrate):
        pass
        


### Sofmax layer for multiclass classification problems - forward calculation

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        f = np.dot(X, self.W) + self.b
        print('\nf=',f)
        f -= np.max(f, axis=1, keepdims=True) #trick for numerical stability
        print('\nf trick=',f)
        probs = np.exp(f)
        print('\nexp=',probs)
        probs /= np.sum(probs, axis=1, keepdims=True)
        print('\nprobs normalized',probs)
        self.probs = probs
    def Test(self, X, ClsIndx):
        pass
    def GetProbs(self):
        pass
    def GetPredictions(self):
        pass
    def Learn(self, X, ClsIndx, lrate):
        pass
        

X = np.loadtxt('data.txt')
labels = X[:,-1].astype('int32')
X = X[:,:-1]
print('X=',X) 
print()
print('labels=',labels)
print()

net = SoftMaxLinear(2, 3)
net.Forward(X)        

### Sofmax layer for multiclass classification problems - testing the answers

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        f = np.dot(X, self.W) + self.b
        f -= np.max(f, axis=1, keepdims=True) #trick for numerical stability
        probs = np.exp(f)
        probs /= np.sum(probs, axis=1, keepdims=True)
        self.probs = probs
    def Test(self, X, ClsIndx):
        self.Forward(X)
        #data loss: mean cross-entropy loss
        ex_num = X.shape[0]
        data_loss = -np.log(self.probs[range(ex_num),ClsIndx]).sum()/ex_num
        #classification error
        predictions = self.GetPredictions()
        errors_num = np.sum(predictions != ClsIndx)
        error_rate = errors_num / ex_num
        return (data_loss, error_rate, errors_num)
    def GetProbs(self):
        return self.probs
    def GetPredictions(self):
        return np.argmax(self.probs, axis=1)
    def Learn(self, X, ClsIndx, lrate):
        pass


X = np.loadtxt('data.txt')
labels = X[:,-1].astype('int32')
X = X[:,:-1]
print(X)
print(labels)

net = SoftMaxLinear(2, 3)
(dloss, erate, errors) = net.Test(X, labels)
print('dloss:',dloss)
print('erate:',erate)
print('errors:',errors)


### Sofmax layer for multiclass classification problems - training

The chain rules is used:

![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        f = np.dot(X, self.W) + self.b
        f -= np.max(f, axis=1, keepdims=True) #trick for numerical stability
        probs = np.exp(f)
        probs /= np.sum(probs, axis=1, keepdims=True)
        self.probs = probs
    def Test(self, X, ClsIndx):
        self.Forward(X)
        #data loss: mean cross-entropy loss
        ex_num = X.shape[0]
        data_loss = -np.log(self.probs[range(ex_num),ClsIndx]).sum()/ex_num
        #classification error
        predictions = np.argmax(self.probs, axis=1)
        errors_num = np.sum(predictions != ClsIndx)
        error_rate = errors_num / ex_num
        return (data_loss, error_rate, errors_num)
    def GetProbs(self):
        return self.probs
    def GetPredictions(self):
        return np.argmax(self.probs, axis=1)
    def Learn(self, X, ClsIndx, lrate): #just one epoch here
        self.Forward(X)
        #gradients of outputs (class probabilities)
        ex_num = X.shape[0]
        dprobs = self.probs.copy()
        dprobs[range(ex_num), ClsIndx] -= 1.0
        dprobs /= ex_num #average over all examples
        print('dprobs')
        print(dprobs)
        #gradient of weights and biases
        dW = np.dot(X.T, dprobs) # chain rule to calculate gradients
        db = np.sum(dprobs, axis=0,keepdims=True)
        print('dW')
        print(dW)
        print('db')
        print(db)
        #update neurons
        self.W = self.W - lrate*dW
        self.b = self.b - lrate*db
        print('W')
        print(net.W)
        print('b')
        print(net.b)
        

X = np.loadtxt('data.txt')
labels = X[:,-1].astype('int32')
X = X[:,:-1]
print(X)
print(labels)

print()

net = SoftMaxLinear(2, 3)

(dloss, erate, errors) = net.Test(X, labels)
print('dloss:',dloss)
print('erate:',erate)
print('errors:',errors)
print(net.GetPredictions())

print('starting learning...')
counter = 0
while True:
    net.Learn(X, labels, 1)
    (dloss, erate, errors) = net.Test(X, labels)
    print('\n\niteration', counter+1)
    print('probs:\n',net.probs)
    print('dloss:',dloss)
    print('erate:',erate)
    print('errors:',errors)
    print(net.GetPredictions())
    counter += 1
    if dloss < 0.1:
        break
print('learning finished')


### Task 1

THIS TASK IS NOT OBLIGATORY. DO IT ONLY IF YOU WANT.

- Implement a Python class for RBF network with softmax output layer.

- As the starting point, use the RBF class with Widrow-Hoff models as output layer (see the previous notebook)

- Show example working of your code on data_3classes_nonlinear_2D.txt and diabetes datasets

In [None]:
#YOUR CODE HERE