# Differential Evolution Neural Network training

Using differential evolution to train neural network. The task is basically the problem of optimizing the function of 83 arguments (number depends on architecture), which are the weights and biases of neural network. 

In [89]:
from keras.utils import to_categorical
from scipy import optimize as op
import pandas as pd
import numpy as np

In [90]:
# Upload the Iris dataset from file. 
dataset = pd.read_csv('iris.data', header=0)

# Shuffling dataset to split it into train and test set, and avoid high variance.
dataset = dataset.sample(frac=1).reset_index(drop=True)
dataset.head(10)

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.6,2.8,4.9,2.0,Iris-virginica
1,6.1,3.0,4.9,1.8,Iris-virginica
2,5.7,2.6,3.5,1.0,Iris-versicolor
3,4.4,2.9,1.4,0.2,Iris-setosa
4,4.3,3.0,1.1,0.1,Iris-setosa
5,5.1,3.8,1.6,0.2,Iris-setosa
6,6.4,3.2,4.5,1.5,Iris-versicolor
7,5.6,3.0,4.5,1.5,Iris-versicolor
8,6.3,2.3,4.4,1.3,Iris-versicolor
9,5.7,2.9,4.2,1.3,Iris-versicolor


In [91]:
# Adding bias unit.
dataset.insert(0, 'bias', 1)

# Data standartization
for label in list(dataset)[1:-1]:
    dataset[label] = (dataset[label] - dataset[label].mean()) / dataset[label].abs().max()

# Mapping class names to numbers.
dataset['class'] = dataset['class'].map({'Iris-setosa':0, 
                                         'Iris-versicolor':1, 
                                         'Iris-virginica':2}).astype(int)
train = dataset[:100]
test = dataset[100:]

X = train.as_matrix(['bias',
                   'sepal length', 
                   'sepal width', 
                   'petal length', 
                   'petal width'])
y = train.as_matrix(['class'])
y = to_categorical(y)

X_test = test.as_matrix(['bias',
                   'sepal length', 
                   'sepal width', 
                   'petal length', 
                   'petal width'])
y_test = test.as_matrix(['class'])
y_test = to_categorical(y_test)
dataset.sample(10)

Unnamed: 0,bias,sepal length,sepal width,petal length,petal width,class
77,1,0.019831,-0.194091,0.179903,0.120533,2
146,1,-0.144726,0.033182,-0.356329,-0.399467,0
55,1,-0.005485,-0.080455,0.194396,0.280533,2
89,1,-0.1827,0.033182,-0.356329,-0.399467,0
25,1,0.17173,0.033182,0.324831,0.240533,2
127,1,0.057806,-0.035,0.26686,0.240533,2
1,1,0.032489,-0.012273,0.165411,0.240533,2
125,1,0.083122,-0.057727,0.121932,0.120533,1
90,1,0.083122,-0.012273,0.295845,0.400533,2
137,1,0.121097,0.033182,0.310338,0.440533,2


In [113]:
# Defining Rectified Linear Unit, which is used as actiation function here.
def relu(x):
    return x * (x > 0)

# Defining softmax, which is used in the output layer.
def softmax(a):
    s = np.exp(a - np.max(a))
    if a.ndim <3: return s/s.sum(0) 

# Defining Function to minimize.
def f(theta):
    # Reshaping 1D input weights to 2 2D matrix with bias.    
    theta1 = theta[:40]
    bias1 = theta[40:50]
    theta2 = theta[50:80]
    bias2 = theta[80:]
    theta1 = theta1.reshape((10, 4))
    theta2 = theta2.reshape((3, 10))
    theta1 = np.column_stack((bias1, theta1))
    theta2 = np.column_stack((bias2, theta2))
    
    # Choosing a batch of 3 random samples.
    batch_idx = np.random.choice(X.shape[0], 1, replace=False)
    x_batch = X[batch_idx]
    y_batch = y[batch_idx]
    
    # Calculationg the loss via cross entropy.
    h = relu(theta1.dot(x_batch.T))
    h = np.row_stack((np.ones(h.shape[1]), h))
    out = softmax(theta2.dot(h))
    loss = -np.log((y_batch.T * out).sum(0))
    return loss.sum()

In [114]:
# bounds = [(-2., 2.)]*70
# op.differential_evolution(f, bounds, maxiter=50000)
bounds = [(-2., 2.)]*83
op.differential_evolution(f, bounds, maxiter=50000)

     fun: 0.08418210340759222
 message: 'Maximum number of iterations has been exceeded.'
    nfev: 62253513
     nit: 50000
 success: False
       x: array([-0.74464946, -0.58780526,  1.41413286,  1.56612411,  0.89748166,
        1.26587933, -1.7823427 , -1.74265063,  0.8177892 , -0.04201371,
        1.93503724,  0.47859909,  0.45392613, -1.78392789,  1.01074704,
        1.83660519,  1.74030175, -0.10822437, -1.30336803, -1.77002355,
       -0.40594965, -0.70434178, -1.41642537, -1.91579606,  0.18598574,
       -0.88166189,  1.84681131,  0.96850258, -1.09169225,  0.88481126,
       -1.8010207 , -1.9930251 ,  0.55270947, -1.51767629, -1.35514934,
       -1.56836933,  0.7268226 , -0.81680452,  1.91438837,  0.98525988,
        0.02142366,  1.57358414, -0.29674495, -0.1720991 , -0.24934692,
        1.37585919,  1.6708461 ,  0.51438577,  1.55851392,  0.83227882,
       -0.20616961,  1.36950349, -1.55857974, -0.78808741,  0.58527486,
        1.63512289, -1.99569004,  1.75354475,  1.67869191