# Erasmus Neural Networks
http://michalbereta.pl/nn
## Softmax layer


## Before you start

Exacute the examples.

Then, do the tasks and send back the notebook.

Change the name of this notebook according to the schema: {YourSurname}\_{YourFirstName}\_{OriginalFileName}.

Be sure to fill all places with "YOUR ANSWER HERE".

When ready, send the notebook, with all the necessary files zipped, to the teacher.

### Softmax transform



In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

#score functions values (unnormalized log probabilities)
f = np.array([10.1, 3.4, 7.5])
print(f)

#unnormalized probabilities
unnorm_probs = np.exp(f)
print(unnorm_probs)

#normalized probabilities
probs = unnorm_probs / np.sum(unnorm_probs)
print(probs)
print(np.sum(probs))

### Softmax transform - trick for numerical stability

In [None]:
import numpy as np
import matplotlib.pyplot as plt

#score functions values (unnormalized log probabilities)
f = np.array([10.1, 3.4, 7.5])
print(f)

#unnormalized probabilities
unnorm_probs = np.exp(f) # this could be numerically problematic
print(unnorm_probs)

#normalized probabilities
probs1 = unnorm_probs / np.sum(unnorm_probs)
print(probs)
print(np.sum(probs))

#the trick, for numerical stability
f = f - np.max(f)
print('\nafter the trick...')
unnorm_probs = np.exp(f)
print(unnorm_probs)
probs2 = unnorm_probs / np.sum(unnorm_probs)
print(probs)
print(np.sum(probs))
print(probs1 == probs2)
print(np.abs(probs1-probs2).max())

### Simple data from three classe

In [None]:
import numpy as np
import matplotlib.pyplot as plt

X = np.loadtxt('data.txt')
print(X)

plt.plot(X[0:3,0], X[0:3,1],'or')
plt.plot(X[3:6,0], X[3:6,1],'og')
plt.plot(X[6:9,0], X[6:9,1],'ob')
plt.show()

### Sofmax layer for multiclass classification problems

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        pass
    def Forward(self, X): #examples as rows in X
        pass
    def Test(self, X, ClsIndx):
        pass
    def GetProbs(self):
        pass
    def GetPredictions(self):
        pass
    def Learn(self, X, ClsIndx, lrate):
        pass
        


### Sofmax layer for multiclass classification problems - constructor

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        pass
    def Test(self, X, ClsIndx):
        pass
    def GetProbs(self):
        pass
    def GetPredictions(self):
        pass
    def Learn(self, X, ClsIndx, lrate):
        pass
        


### Sofmax layer for multiclass classification problems - forward calculation

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        f = np.dot(X, self.W) + self.b
        print('\nf=',f)
        f -= np.max(f, axis=1, keepdims=True) #trick for numerical stability
        print('\nf trick=',f)
        probs = np.exp(f)
        print('\nexp=',probs)
        probs /= np.sum(probs, axis=1, keepdims=True)
        print('\nprobs normalized',probs)
        self.probs = probs
    def Test(self, X, ClsIndx):
        pass
    def GetProbs(self):
        pass
    def GetPredictions(self):
        pass
    def Learn(self, X, ClsIndx, lrate):
        pass
        

X = np.loadtxt('data.txt')
labels = X[:,-1].astype('int32')
X = X[:,:-1]
print('X=',X) 
print()
print('labels=',labels)
print()

net = SoftMaxLinear(2, 3)
net.Forward(X)        

### Sofmax layer for multiclass classification problems - testing the answers

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        f = np.dot(X, self.W) + self.b
        f -= np.max(f, axis=1, keepdims=True) #trick for numerical stability
        probs = np.exp(f)
        probs /= np.sum(probs, axis=1, keepdims=True)
        self.probs = probs
    def Test(self, X, ClsIndx):
        self.Forward(X)
        #data loss: mean cross-entropy loss
        ex_num = X.shape[0]
        data_loss = -np.log(self.probs[range(ex_num),ClsIndx]).sum()/ex_num
        #classification error
        predictions = self.GetPredictions()
        errors_num = np.sum(predictions != ClsIndx)
        error_rate = errors_num / ex_num
        return (data_loss, error_rate, errors_num)
    def GetProbs(self):
        return self.probs
    def GetPredictions(self):
        return np.argmax(self.probs, axis=1)
    def Learn(self, X, ClsIndx, lrate):
        pass


X = np.loadtxt('data.txt')
labels = X[:,-1].astype('int32')
X = X[:,:-1]
print(X)
print(labels)

net = SoftMaxLinear(2, 3)
(dloss, erate, errors) = net.Test(X, labels)
print('dloss:',dloss)
print('erate:',erate)
print('errors:',errors)


### Sofmax layer for multiclass classification problems - training

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class SoftMaxLinear:
    def __init__(self, inputs_num, outputs_num):
        self.inum = inputs_num
        self.onum = outputs_num
        self.W = (-1 + 2*np.random.rand(inputs_num, outputs_num))/100.0 #neurons as columns
        self.b = np.zeros((1, outputs_num)) #horizontal vector
        self.probs = None
    def Forward(self, X): #examples as rows in X
        f = np.dot(X, self.W) + self.b
        f -= np.max(f, axis=1, keepdims=True) #trick for numerical stability
        probs = np.exp(f)
        probs /= np.sum(probs, axis=1, keepdims=True)
        self.probs = probs
        print(probs.shape)
    def Test(self, X, ClsIndx):
        self.Forward(X)
        #data loss: mean cross-entropy loss
        ex_num = X.shape[0]
        data_loss = -np.log(self.probs[range(ex_num),ClsIndx]).sum()/ex_num
        #classification error
        predictions = np.argmax(self.probs, axis=1)
        errors_num = np.sum(predictions != ClsIndx)
        error_rate = errors_num / ex_num
        return (data_loss, error_rate, errors_num)
    def GetProbs(self):
        return self.probs
    def GetPredictions(self):
        return np.argmax(self.probs, axis=1)
    def Learn(self, X, ClsIndx, lrate): #just one epoch here
        self.Forward(X)
        #gradients of outputs (class probabilities)
        ex_num = X.shape[0]
        dprobs = self.probs.copy()
        dprobs[range(ex_num), ClsIndx] -= 1.0
        dprobs /= ex_num #average over all examples
        print('dprobs')
        print(dprobs)
        #gradient of weights and biases
        dW = np.dot(X.T, dprobs) # chain rule to calculate gradients
        db = np.sum(dprobs, axis=0,keepdims=True)
        print('dW')
        print(dW)
        print('db')
        print(db)
        #update neurons
        self.W = self.W - lrate*dW
        self.b = self.b - lrate*db
        print('W')
        print(net.W)
        print('b')
        print(net.b)
        

X = np.loadtxt('data.txt')
labels = X[:,-1].astype('int32')
X = X[:,:-1]
print(X)
print(labels)

print()

net = SoftMaxLinear(2, 3)

(dloss, erate, errors) = net.Test(X, labels)
print('dloss:',dloss)
print('erate:',erate)
print('errors:',errors)
print(net.GetPredictions())

print('starting learning...')
counter = 0
while True:
    net.Learn(X, labels, 1)
    (dloss, erate, errors) = net.Test(X, labels)
    print('\n\niteration', counter+1)
    print('probs:\n',net.probs)
    print('dloss:',dloss)
    print('erate:',erate)
    print('errors:',errors)
    print(net.GetPredictions())
    counter += 1
    if dloss < 0.1:
        break
print('learning finished')


### Task 1

THIS TASK IS NOT OBLIGATORY. DO IT ONLY IF YOU WANT.

- Implement a Python class for RBF network with softmax output layer.

- As the starting point, use the RBF class with Widrow-Hoff models as output layer (see the previous notebook)

- Show example working of your code on data_3classes_nonlinear_2D.txt and diabetes datasets

In [3]:
import numpy as np

class RBFNNSoftMax:
    def __init__(self, inputs_num, hidden_num, output_num):#hidden_num=number of radial neurons in the hidden layer
        self.inputs_num = inputs_num
        self.hidden_num = hidden_num
        self.output_num = output_num
        self.hcenters = np.zeros((hidden_num, inputs_num)) #centres of radial functions in the hidden layer
        self.hsigmas = np.ones(hidden_num)#sigma values of radial functions in the hidden layer
        self.outweights = np.random.rand(hidden_num, output_num) #each output neuron as a column
        self.outbiases = np.random.rand(output_num)#biases of the output linear neurons
        self.houtputs = None #outputs of radial neurons (hidden layer)
        self.netoutputs = None #output of the network (linear neurons)
        self.probs = None
        self.stats = None #statistics about the MSE during batch training
    def Print(self):#print basic info about the network
        print('hcenters:\n',self.hcenters)
        print('hsigmas:\n',self.hsigmas)
        print('outweights:\n', self.outweights)
        print('outbiases:\n',self.outbiases)        
        if self.houtputs is not None:
            print('houtputs:\n',self.houtputs)
        if self.netoutputs is not None:
            print('netoutputs:\n',self.netoutputs)  
    def Forward(self, inputs):
        ##outputs of radial neurons (hidden layer)
        self.houtputs = np.empty((inputs.shape[0], self.hcenters.shape[0]), dtype = float)
        for i in range(inputs.shape[0]): #for each training example
            self.houtputs[i,:] = np.exp(-np.sum((self.hcenters - inputs[i,:])**2, axis=1)/self.hsigmas**2)
        ##outputs of linear neurons (output layer)
        self.netoutputs = np.dot(self.houtputs, self.outweights) + self.outbiases
        self.netoutputs -= np.max(self.netoutputs, axis=1, keepdims=True) #trick for numerical stability
        #print(self.houtputs.shape)
        probs = np.exp(self.netoutputs)
        probs /= np.sum(probs, axis=1, keepdims=True)
        self.probs = probs
    def GetProbs(self):
        return self.probs
    def Test(self, X, ClsIndx):
        self.Forward(X)
        #data loss: mean cross-entropy loss
        ex_num = X.shape[0]
        data_loss = -np.log(self.probs[range(ex_num),ClsIndx]).sum()/ex_num
        #classification error
        predictions = np.argmax(self.probs, axis=1)
        errors_num = np.sum(predictions != ClsIndx)
        error_rate = errors_num / ex_num
        return (data_loss, error_rate, errors_num)
    def GetOutputs(self):#returns real valued outputs
        return self.netoutputs
    def GetPredictions(self):#returns class labels as 0,1,2,...
        return np.argmax(self.probs, axis=1)
    def GetClassificationError(self, labels):
        return np.sum(labels!=self.GetPredictions())  
    def GetMSE(self, d):
        self.mse = ((self.netoutputs - d)*(self.netoutputs - d)).sum(axis=1).sum() /d.shape[0]
        return self.mse       
    def InitCenters(self, inputs, sigma):#randomly select a self.hidden_num number of training examples and copy their positions as centres of rbf neurons
        self.hsigmas = np.ones(self.hidden_num)*sigma
        indxs = set()
        while len(indxs) < self.hcenters.shape[0]:
            indxs.add(np.random.randint(0,inputs.shape[0]))
        self.hcenters = inputs[np.asarray(list(indxs)), :].copy()
    def TrainMPInv(self, X, d, sigma): #matrix pseudo inverse
        self.InitCenters(X, sigma)
        self.Forward(X)
        print(d)
        print(d.shape)
        #now the matrix pseudoinverse for the weights of the output linear neurons
        r = np.hstack((np.ones((self.houtputs.shape[0], 1)), self.houtputs))
        w = np.dot(np.dot( np.linalg.inv( np.dot(r.T, r) ), r.T), d)
        self.w = w[1:,:]
        self.b = w[0,:]
    def TrainBatch(self, X, d, labels, sigma, eta, max_iters): #Widrow-Hoff model, delta rule
        self.InitCenters(X, sigma)
        self.Forward(X)
        self.stats = []
        for i in range(max_iters):
            self.outweights += eta*np.dot(self.houtputs.T, d - self.netoutputs)/X.shape[0]
            self.outbiases += eta*np.dot(np.ones((1,self.houtputs.shape[0])), d - self.netoutputs).flatten()/X.shape[0]
            self.Forward(X)
            mse = self.GetMSE(d)
            self.stats.append(mse)
            print('mse=',mse)
            classification_error = self.GetClassificationError(labels)
            print('classification_error=',classification_error)
            print()            
    def Learn(self, X, labels, sigma, eta): #just one epoch here
        self.Forward(X)
        #gradients of outputs (class probabilities)
        ex_num = X.shape[0]
        dprobs = self.probs.copy()
        #print('shape dprobs',dprobs.shape)
        dprobs[range(ex_num), labels] -= 1.0
        dprobs /= ex_num #average over all examples
        #print('probs')
        #print(self.probs)
        #print('dprobs')
        #print(dprobs.shape)
        #gradient of weights and biases
        dW = np.dot(self.houtputs.T, dprobs) # chain rule to calculate gradients
        #print('houtputs shape',self.houtputs.shape)
        db = np.sum(dprobs, axis=0,keepdims=True)
        #print('dW')
        #print(dW)
        #print('db')
        #print(db)
        #update neurons
        self.outweights = self.outweights - eta*dW
        self.outbiases = self.outbiases - eta*db
        #print('W')
        #print(self.outweights)
        #print('b')
        #print(self.outbiases)            
            
X = np.loadtxt('pima-diabetes.csv',delimiter=',')
#print(X)
labels = (X[:,-1])
labels = labels.astype(int)
X=X[:,:-1]
print('X',X)
#print(labels)
num_of_cls = len(set(labels))
num_of_ins = X.shape[1]

print('num_of_cls=',num_of_cls)
print('num_of_ins=',num_of_ins)

#experiment with the values of hidden_num and sigma, so that the training data is well covered by radial responses
hidden_num = 350 #experiment with this value
sigma = 0.3 #experiment with this value

net = RBFNNSoftMax(num_of_ins, hidden_num, num_of_cls)
net.InitCenters(X, sigma)
#net.Print()
net.Forward(X)
#net.Print()
print('Classification error before training=',net.GetClassificationError(labels))

counter=0
dloss=1
while dloss > 0.1 and counter < 2000:
    net.Learn(X, labels, sigma, 1)
    (dloss, erate, errors) = net.Test(X, labels)
    print('\n\niteration', counter+1)
    #print('probs:\n',net.probs)
    print('dloss:',dloss)
    print('erate:',erate)
    print('errors:',errors)
    #print(net.GetPredictions())
    counter += 1
print('learning finished')

#data_3classes

X = np.loadtxt('data_3classes_nonlinear_2D.txt')
#print(X)
labels = (X[:,-1])
labels = labels.astype(int)
X=X[:,:-1]
print('X',X)
#print(labels)
num_of_cls = len(set(labels))
num_of_ins = X.shape[1]

print('num_of_cls=',num_of_cls)
print('num_of_ins=',num_of_ins)

#experiment with the values of hidden_num and sigma, so that the training data is well covered by radial responses
hidden_num = 50 #experiment with this value
sigma = 0.3 #experiment with this value

net2 = RBFNNSoftMax(num_of_ins, hidden_num, num_of_cls)
net2.InitCenters(X, sigma)
#net.Print()
net2.Forward(X)
#net.Print()
print('Classification error before training=',net2.GetClassificationError(labels))

net2.Learn(X, labels, sigma, 0.5)

net2.Forward(X)
#net.Print()

counter=0
dloss=1
while dloss > 0.05 and counter < 2000:
    net2.Learn(X, labels, sigma, 1)
    (dloss, erate, errors) = net2.Test(X, labels)
    print('\n\niteration', counter+1)
    #print('probs:\n',net.probs)
    print('dloss:',dloss)
    print('erate:',erate)
    print('errors:',errors)
    #print(net.GetPredictions())
    counter += 1
print('learning finished')

print('With the right values, I was able to get 91 errors in the pima-diabetes dataset.')
print('I was also able to get 0 errors in data_3classes_nonlinear_2D')

X [[  6.    148.     72.    ...  33.6     0.627  50.   ]
 [  1.     85.     66.    ...  26.6     0.351  31.   ]
 [  8.    183.     64.    ...  23.3     0.672  32.   ]
 ...
 [  5.    121.     72.    ...  26.2     0.245  30.   ]
 [  1.    126.     60.    ...  30.1     0.349  47.   ]
 [  1.     93.     70.    ...  30.4     0.315  23.   ]]
num_of_cls= 2
num_of_ins= 8
Classification error before training= 267


iteration 1
dloss: 0.656295426531342
erate: 0.3463541666666667
errors: 266


iteration 2
dloss: 0.6541835031044517
erate: 0.3489583333333333
errors: 268


iteration 3
dloss: 0.6533092852817527
erate: 0.3489583333333333
errors: 268


iteration 4
dloss: 0.6528393750150735
erate: 0.3515625
errors: 270


iteration 5
dloss: 0.6524977204147708
erate: 0.3502604166666667
errors: 269


iteration 6
dloss: 0.6521962347422299
erate: 0.3502604166666667
errors: 269


iteration 7
dloss: 0.6519073715136484
erate: 0.3502604166666667
errors: 269


iteration 8
dloss: 0.6516226103922941
erate: 0.3502604



iteration 105
dloss: 0.6256669272726137
erate: 0.328125
errors: 252


iteration 106
dloss: 0.6254143917198127
erate: 0.328125
errors: 252


iteration 107
dloss: 0.6251621533250822
erate: 0.328125
errors: 252


iteration 108
dloss: 0.6249102117901372
erate: 0.328125
errors: 252


iteration 109
dloss: 0.6246585668166723
erate: 0.328125
errors: 252


iteration 110
dloss: 0.6244072181063642
erate: 0.328125
errors: 252


iteration 111
dloss: 0.6241561653608713
erate: 0.328125
errors: 252


iteration 112
dloss: 0.6239054082818364
erate: 0.328125
errors: 252


iteration 113
dloss: 0.623654946570887
erate: 0.328125
errors: 252


iteration 114
dloss: 0.6234047799296373
erate: 0.3268229166666667
errors: 251


iteration 115
dloss: 0.6231549080596882
erate: 0.3268229166666667
errors: 251


iteration 116
dloss: 0.6229053306626297
erate: 0.3268229166666667
errors: 251


iteration 117
dloss: 0.6226560474400412
erate: 0.3268229166666667
errors: 251


iteration 118
dloss: 0.622407058093493
erate: 0.3



iteration 217
dloss: 0.5991621264023926
erate: 0.31640625
errors: 243


iteration 218
dloss: 0.5989410209906677
erate: 0.31640625
errors: 243


iteration 219
dloss: 0.5987201794624651
erate: 0.3151041666666667
errors: 242


iteration 220
dloss: 0.5984996015241052
erate: 0.3151041666666667
errors: 242


iteration 221
dloss: 0.598279286882005
erate: 0.3151041666666667
errors: 242


iteration 222
dloss: 0.5980592352426793
erate: 0.3151041666666667
errors: 242


iteration 223
dloss: 0.5978394463127422
erate: 0.3151041666666667
errors: 242


iteration 224
dloss: 0.5976199197989065
erate: 0.3151041666666667
errors: 242


iteration 225
dloss: 0.5974006554079857
erate: 0.3151041666666667
errors: 242


iteration 226
dloss: 0.5971816528468946
erate: 0.3138020833333333
errors: 241


iteration 227
dloss: 0.5969629118226503
erate: 0.3138020833333333
errors: 241


iteration 228
dloss: 0.5967444320423723
erate: 0.3138020833333333
errors: 241


iteration 229
dloss: 0.5965262132132844
erate: 0.3125
e



iteration 323
dloss: 0.5771373728217625
erate: 0.2838541666666667
errors: 218


iteration 324
dloss: 0.5769426274747516
erate: 0.2825520833333333
errors: 217


iteration 325
dloss: 0.5767481155745299
erate: 0.2825520833333333
errors: 217


iteration 326
dloss: 0.5765538368419897
erate: 0.2825520833333333
errors: 217


iteration 327
dloss: 0.5763597909981962
erate: 0.28125
errors: 216


iteration 328
dloss: 0.5761659777643884
erate: 0.2799479166666667
errors: 215


iteration 329
dloss: 0.5759723968619802
erate: 0.2799479166666667
errors: 215


iteration 330
dloss: 0.5757790480125604
erate: 0.2799479166666667
errors: 215


iteration 331
dloss: 0.5755859309378936
erate: 0.2786458333333333
errors: 214


iteration 332
dloss: 0.5753930453599202
erate: 0.2786458333333333
errors: 214


iteration 333
dloss: 0.5752003910007579
erate: 0.2786458333333333
errors: 214


iteration 334
dloss: 0.5750079675827013
erate: 0.2786458333333333
errors: 214


iteration 335
dloss: 0.5748157748282233
erate: 0.



iteration 431
dloss: 0.5573977370364854
erate: 0.2526041666666667
errors: 194


iteration 432
dloss: 0.55722662941866
erate: 0.2513020833333333
errors: 193


iteration 433
dloss: 0.5570557262082664
erate: 0.2513020833333333
errors: 193


iteration 434
dloss: 0.5568850271476911
erate: 0.2513020833333333
errors: 193


iteration 435
dloss: 0.5567145319795407
erate: 0.2513020833333333
errors: 193


iteration 436
dloss: 0.5565442404466423
erate: 0.2513020833333333
errors: 193


iteration 437
dloss: 0.5563741522920441
erate: 0.2513020833333333
errors: 193


iteration 438
dloss: 0.5562042672590155
erate: 0.2513020833333333
errors: 193


iteration 439
dloss: 0.5560345850910472
erate: 0.25
errors: 192


iteration 440
dloss: 0.5558651055318516
erate: 0.25
errors: 192


iteration 441
dloss: 0.5556958283253639
erate: 0.25
errors: 192


iteration 442
dloss: 0.555526753215741
erate: 0.25
errors: 192


iteration 443
dloss: 0.5553578799473626
erate: 0.24869791666666666
errors: 191


iteration 444
dl



iteration 537
dloss: 0.5403492796474866
erate: 0.22916666666666666
errors: 176


iteration 538
dloss: 0.5401984493942208
erate: 0.22916666666666666
errors: 176


iteration 539
dloss: 0.5400477975155725
erate: 0.22916666666666666
errors: 176


iteration 540
dloss: 0.5398973237785731
erate: 0.22916666666666666
errors: 176


iteration 541
dloss: 0.5397470279504961
erate: 0.22916666666666666
errors: 176


iteration 542
dloss: 0.5395969097988554
erate: 0.22916666666666666
errors: 176


iteration 543
dloss: 0.5394469690914073
erate: 0.22916666666666666
errors: 176


iteration 544
dloss: 0.5392972055961489
erate: 0.22916666666666666
errors: 176


iteration 545
dloss: 0.5391476190813193
erate: 0.22916666666666666
errors: 176


iteration 546
dloss: 0.5389982093153997
erate: 0.22916666666666666
errors: 176


iteration 547
dloss: 0.5388489760671124
erate: 0.22916666666666666
errors: 176


iteration 548
dloss: 0.5386999191054226
erate: 0.22916666666666666
errors: 176


iteration 549
dloss: 0.538



iteration 643
dloss: 0.5253101893076547
erate: 0.21484375
errors: 165


iteration 644
dloss: 0.5251770171788787
erate: 0.21484375
errors: 165


iteration 645
dloss: 0.5250440000843143
erate: 0.21484375
errors: 165


iteration 646
dloss: 0.5249111378168292
erate: 0.21354166666666666
errors: 164


iteration 647
dloss: 0.5247784301695346
erate: 0.21354166666666666
errors: 164


iteration 648
dloss: 0.5246458769357855
erate: 0.21354166666666666
errors: 164


iteration 649
dloss: 0.5245134779091803
erate: 0.21354166666666666
errors: 164


iteration 650
dloss: 0.5243812328835606
erate: 0.21354166666666666
errors: 164


iteration 651
dloss: 0.5242491416530118
erate: 0.21354166666666666
errors: 164


iteration 652
dloss: 0.5241172040118623
erate: 0.21354166666666666
errors: 164


iteration 653
dloss: 0.5239854197546842
erate: 0.21354166666666666
errors: 164


iteration 654
dloss: 0.5238537886762926
erate: 0.21354166666666666
errors: 164


iteration 655
dloss: 0.5237223105717459
erate: 0.2135



iteration 749
dloss: 0.5120179526347123
erate: 0.19661458333333334
errors: 151


iteration 750
dloss: 0.5119001080960303
erate: 0.19661458333333334
errors: 151


iteration 751
dloss: 0.5117823979769308
erate: 0.19661458333333334
errors: 151


iteration 752
dloss: 0.5116648220956449
erate: 0.19661458333333334
errors: 151


iteration 753
dloss: 0.5115473802706375
erate: 0.19661458333333334
errors: 151


iteration 754
dloss: 0.5114300723206059
erate: 0.19661458333333334
errors: 151


iteration 755
dloss: 0.5113128980644807
erate: 0.19661458333333334
errors: 151


iteration 756
dloss: 0.511195857321425
erate: 0.19661458333333334
errors: 151


iteration 757
dloss: 0.5110789499108346
erate: 0.19661458333333334
errors: 151


iteration 758
dloss: 0.5109621756523377
erate: 0.19661458333333334
errors: 151


iteration 759
dloss: 0.510845534365795
erate: 0.19661458333333334
errors: 151


iteration 760
dloss: 0.5107290258712993
erate: 0.19661458333333334
errors: 151


iteration 761
dloss: 0.51061



iteration 851
dloss: 0.5006599734822451
erate: 0.18880208333333334
errors: 145


iteration 852
dloss: 0.5005549428307123
erate: 0.18880208333333334
errors: 145


iteration 853
dloss: 0.5004500292324382
erate: 0.1875
errors: 144


iteration 854
dloss: 0.5003452325286045
erate: 0.1875
errors: 144


iteration 855
dloss: 0.500240552560608
erate: 0.18619791666666666
errors: 143


iteration 856
dloss: 0.5001359891700614
erate: 0.18619791666666666
errors: 143


iteration 857
dloss: 0.5000315421987928
erate: 0.18619791666666666
errors: 143


iteration 858
dloss: 0.4999272114888453
erate: 0.18619791666666666
errors: 143


iteration 859
dloss: 0.49982299688247694
erate: 0.18619791666666666
errors: 143


iteration 860
dloss: 0.4997188982221612
erate: 0.18619791666666666
errors: 143


iteration 861
dloss: 0.4996149153505854
erate: 0.18619791666666666
errors: 143


iteration 862
dloss: 0.49951104811065167
erate: 0.18619791666666666
errors: 143


iteration 863
dloss: 0.4994072963454763
erate: 0.18



iteration 959
dloss: 0.4899617072768751
erate: 0.1796875
errors: 138


iteration 960
dloss: 0.4898684437049868
erate: 0.1796875
errors: 138


iteration 961
dloss: 0.48977528123965053
erate: 0.1796875
errors: 138


iteration 962
dloss: 0.4896822197441771
erate: 0.1796875
errors: 138


iteration 963
dloss: 0.4895892590820711
erate: 0.1796875
errors: 138


iteration 964
dloss: 0.4894963991170302
erate: 0.1796875
errors: 138


iteration 965
dloss: 0.4894036397129449
erate: 0.1796875
errors: 138


iteration 966
dloss: 0.4893109807338987
erate: 0.1796875
errors: 138


iteration 967
dloss: 0.4892184220441678
erate: 0.1796875
errors: 138


iteration 968
dloss: 0.48912596350822035
erate: 0.1796875
errors: 138


iteration 969
dloss: 0.4890336049907167
erate: 0.1796875
errors: 138


iteration 970
dloss: 0.48894134635650915
erate: 0.1796875
errors: 138


iteration 971
dloss: 0.48884918747064177
erate: 0.1796875
errors: 138


iteration 972
dloss: 0.4887571281983501
erate: 0.1796875
errors: 138






iteration 1073
dloss: 0.47994927732731546
erate: 0.1796875
errors: 138


iteration 1074
dloss: 0.47986670454625946
erate: 0.1796875
errors: 138


iteration 1075
dloss: 0.4797842184826347
erate: 0.1796875
errors: 138


iteration 1076
dloss: 0.4797018190203975
erate: 0.1796875
errors: 138


iteration 1077
dloss: 0.47961950604367315
erate: 0.1796875
errors: 138


iteration 1078
dloss: 0.4795372794367547
erate: 0.1796875
errors: 138


iteration 1079
dloss: 0.4794551390841038
erate: 0.1796875
errors: 138


iteration 1080
dloss: 0.47937308487034985
erate: 0.1796875
errors: 138


iteration 1081
dloss: 0.47929111668028995
erate: 0.1796875
errors: 138


iteration 1082
dloss: 0.4792092343988885
erate: 0.1796875
errors: 138


iteration 1083
dloss: 0.4791274379112774
erate: 0.1796875
errors: 138


iteration 1084
dloss: 0.4790457271027555
erate: 0.1796875
errors: 138


iteration 1085
dloss: 0.4789641018587884
erate: 0.1796875
errors: 138


iteration 1086
dloss: 0.4788825620650086
erate: 0.1796875



iteration 1187
dloss: 0.4710677161813425
erate: 0.1796875
errors: 138


iteration 1188
dloss: 0.4709943208514728
erate: 0.1796875
errors: 138


iteration 1189
dloss: 0.47092100004430687
erate: 0.1796875
errors: 138


iteration 1190
dloss: 0.470847753661646
erate: 0.1796875
errors: 138


iteration 1191
dloss: 0.47077458160543556
erate: 0.1796875
errors: 138


iteration 1192
dloss: 0.4707014837777655
erate: 0.1796875
errors: 138


iteration 1193
dloss: 0.47062846008087006
erate: 0.1796875
errors: 138


iteration 1194
dloss: 0.470555510417127
erate: 0.1796875
errors: 138


iteration 1195
dloss: 0.47048263468905827
erate: 0.1796875
errors: 138


iteration 1196
dloss: 0.47040983279932913
erate: 0.1796875
errors: 138


iteration 1197
dloss: 0.47033710465074824
erate: 0.1796875
errors: 138


iteration 1198
dloss: 0.47026445014626744
erate: 0.1796875
errors: 138


iteration 1199
dloss: 0.4701918691889812
erate: 0.1796875
errors: 138


iteration 1200
dloss: 0.4701193616821276
erate: 0.1796875



iteration 1301
dloss: 0.46315797131525116
erate: 0.1796875
errors: 138


iteration 1302
dloss: 0.4630924725079753
erate: 0.1796875
errors: 138


iteration 1303
dloss: 0.4630270379122458
erate: 0.1796875
errors: 138


iteration 1304
dloss: 0.4629616674450799
erate: 0.1796875
errors: 138


iteration 1305
dloss: 0.46289636102361814
erate: 0.1796875
errors: 138


iteration 1306
dloss: 0.46283111856512305
erate: 0.1796875
errors: 138


iteration 1307
dloss: 0.4627659399869793
erate: 0.1796875
errors: 138


iteration 1308
dloss: 0.46270082520669426
erate: 0.1796875
errors: 138


iteration 1309
dloss: 0.4626357741418963
erate: 0.1796875
errors: 138


iteration 1310
dloss: 0.46257078671033636
erate: 0.1796875
errors: 138


iteration 1311
dloss: 0.4625058628298862
erate: 0.1796875
errors: 138


iteration 1312
dloss: 0.4624410024185394
erate: 0.1796875
errors: 138


iteration 1313
dloss: 0.46237620539441043
erate: 0.1796875
errors: 138


iteration 1314
dloss: 0.46231147167573483
erate: 0.17968



iteration 1415
dloss: 0.4560855316179831
erate: 0.1796875
errors: 138


iteration 1416
dloss: 0.456026846738743
erate: 0.1796875
errors: 138


iteration 1417
dloss: 0.4559682173584825
erate: 0.1796875
errors: 138


iteration 1418
dloss: 0.45590964340707646
erate: 0.1796875
errors: 138


iteration 1419
dloss: 0.455851124814503
erate: 0.1796875
errors: 138


iteration 1420
dloss: 0.455792661510843
erate: 0.1796875
errors: 138


iteration 1421
dloss: 0.4557342534262807
erate: 0.1796875
errors: 138


iteration 1422
dloss: 0.45567590049110257
erate: 0.1796875
errors: 138


iteration 1423
dloss: 0.4556176026356981
erate: 0.1796875
errors: 138


iteration 1424
dloss: 0.4555593597905592
erate: 0.1796875
errors: 138


iteration 1425
dloss: 0.4555011718862798
erate: 0.1796875
errors: 138


iteration 1426
dloss: 0.4554430388535562
erate: 0.1796875
errors: 138


iteration 1427
dloss: 0.4553849606231865
erate: 0.1796875
errors: 138


iteration 1428
dloss: 0.45532693712607086
erate: 0.1796875
erro



iteration 1529
dloss: 0.4497367279229613
erate: 0.1796875
errors: 138


iteration 1530
dloss: 0.44968394204999385
erate: 0.1796875
errors: 138


iteration 1531
dloss: 0.4496312043100135
erate: 0.1796875
errors: 138


iteration 1532
dloss: 0.449578514643695
erate: 0.1796875
errors: 138


iteration 1533
dloss: 0.44952587299179925
erate: 0.1796875
errors: 138


iteration 1534
dloss: 0.4494732792951739
erate: 0.1796875
errors: 138


iteration 1535
dloss: 0.4494207334947524
erate: 0.1796875
errors: 138


iteration 1536
dloss: 0.44936823553155464
erate: 0.1796875
errors: 138


iteration 1537
dloss: 0.4493157853466864
erate: 0.1796875
errors: 138


iteration 1538
dloss: 0.44926338288133943
erate: 0.1796875
errors: 138


iteration 1539
dloss: 0.449211028076791
erate: 0.1796875
errors: 138


iteration 1540
dloss: 0.44915872087440417
erate: 0.1796875
errors: 138


iteration 1541
dloss: 0.4491064612156272
erate: 0.1796875
errors: 138


iteration 1542
dloss: 0.449054249041994
erate: 0.1796875
er



iteration 1643
dloss: 0.44401545542016024
erate: 0.1796875
errors: 138


iteration 1644
dloss: 0.4439677945266382
erate: 0.1796875
errors: 138


iteration 1645
dloss: 0.44392017552914426
erate: 0.1796875
errors: 138


iteration 1646
dloss: 0.4438725983773936
erate: 0.1796875
errors: 138


iteration 1647
dloss: 0.4438250630211735
erate: 0.1796875
errors: 138


iteration 1648
dloss: 0.4437775694103439
erate: 0.1796875
errors: 138


iteration 1649
dloss: 0.44373011749483665
erate: 0.1796875
errors: 138


iteration 1650
dloss: 0.44368270722465536
erate: 0.1796875
errors: 138


iteration 1651
dloss: 0.44363533854987586
erate: 0.1796875
errors: 138


iteration 1652
dloss: 0.4435880114206457
erate: 0.1796875
errors: 138


iteration 1653
dloss: 0.443540725787184
erate: 0.1796875
errors: 138


iteration 1654
dloss: 0.4434934815997818
erate: 0.1796875
errors: 138


iteration 1655
dloss: 0.4434462788088011
erate: 0.1796875
errors: 138


iteration 1656
dloss: 0.44339911736467547
erate: 0.1796875



iteration 1757
dloss: 0.43884034074429074
erate: 0.1796875
errors: 138


iteration 1758
dloss: 0.4387971487777695
erate: 0.1796875
errors: 138


iteration 1759
dloss: 0.43875399341461624
erate: 0.1796875
errors: 138


iteration 1760
dloss: 0.4387108746121024
erate: 0.1796875
errors: 138


iteration 1761
dloss: 0.43866779232755987
erate: 0.1796875
errors: 138


iteration 1762
dloss: 0.43862474651838096
erate: 0.1796875
errors: 138


iteration 1763
dloss: 0.43858173714201804
erate: 0.1796875
errors: 138


iteration 1764
dloss: 0.43853876415598386
erate: 0.1796875
errors: 138


iteration 1765
dloss: 0.4384958275178514
erate: 0.1796875
errors: 138


iteration 1766
dloss: 0.4384529271852529
erate: 0.1796875
errors: 138


iteration 1767
dloss: 0.43841006311588143
erate: 0.1796875
errors: 138


iteration 1768
dloss: 0.4383672352674892
erate: 0.1796875
errors: 138


iteration 1769
dloss: 0.4383244435978885
erate: 0.1796875
errors: 138


iteration 1770
dloss: 0.43828168806495094
erate: 0.1796



iteration 1871
dloss: 0.4341423340408343
erate: 0.1796875
errors: 138


iteration 1872
dloss: 0.4341030535688417
erate: 0.1796875
errors: 138


iteration 1873
dloss: 0.43406380519668836
erate: 0.1796875
errors: 138


iteration 1874
dloss: 0.43402458888796014
erate: 0.1796875
errors: 138


iteration 1875
dloss: 0.4339854046062941
erate: 0.1796875
errors: 138


iteration 1876
dloss: 0.43394625231537737
erate: 0.1796875
errors: 138


iteration 1877
dloss: 0.4339071319789472
erate: 0.1796875
errors: 138


iteration 1878
dloss: 0.43386804356079217
erate: 0.1796875
errors: 138


iteration 1879
dloss: 0.43382898702475003
erate: 0.1796875
errors: 138


iteration 1880
dloss: 0.43378996233470923
erate: 0.1796875
errors: 138


iteration 1881
dloss: 0.43375096945460817
erate: 0.1796875
errors: 138


iteration 1882
dloss: 0.433712008348435
erate: 0.1796875
errors: 138


iteration 1883
dloss: 0.43367307898022833
erate: 0.1796875
errors: 138


iteration 1884
dloss: 0.433634181314076
erate: 0.179687



iteration 1985
dloss: 0.42986268508801145
erate: 0.1796875
errors: 138


iteration 1986
dloss: 0.42982684110094693
erate: 0.1796875
errors: 138


iteration 1987
dloss: 0.42979102536991004
erate: 0.1796875
errors: 138


iteration 1988
dloss: 0.4297552378637688
erate: 0.1796875
errors: 138


iteration 1989
dloss: 0.42971947855143444
erate: 0.1796875
errors: 138


iteration 1990
dloss: 0.4296837474018595
erate: 0.1796875
errors: 138


iteration 1991
dloss: 0.42964804438403886
erate: 0.1796875
errors: 138


iteration 1992
dloss: 0.4296123694670102
erate: 0.1796875
errors: 138


iteration 1993
dloss: 0.4295767226198525
erate: 0.1796875
errors: 138


iteration 1994
dloss: 0.4295411038116869
erate: 0.1796875
errors: 138


iteration 1995
dloss: 0.4295055130116768
erate: 0.1796875
errors: 138


iteration 1996
dloss: 0.42946995018902706
erate: 0.1796875
errors: 138


iteration 1997
dloss: 0.4294344153129844
erate: 0.1796875
errors: 138


iteration 1998
dloss: 0.4293989083528375
erate: 0.179687



iteration 53
dloss: 0.08910000621382669
erate: 0.004761904761904762
errors: 1


iteration 54
dloss: 0.08795755427947953
erate: 0.004761904761904762
errors: 1


iteration 55
dloss: 0.08684747439565683
erate: 0.004761904761904762
errors: 1


iteration 56
dloss: 0.08576835666812724
erate: 0.004761904761904762
errors: 1


iteration 57
dloss: 0.08471887375985448
erate: 0.004761904761904762
errors: 1


iteration 58
dloss: 0.08369777483283714
erate: 0.004761904761904762
errors: 1


iteration 59
dloss: 0.08270388002289074
erate: 0.004761904761904762
errors: 1


iteration 60
dloss: 0.08173607539288065
erate: 0.004761904761904762
errors: 1


iteration 61
dloss: 0.08079330831624765
erate: 0.004761904761904762
errors: 1


iteration 62
dloss: 0.07987458324817177
erate: 0.004761904761904762
errors: 1


iteration 63
dloss: 0.07897895784652748
erate: 0.004761904761904762
errors: 1


iteration 64
dloss: 0.07810553940898142
erate: 0.004761904761904762
errors: 1


iteration 65
dloss: 0.0772534815962647