# Differential Evolution Neural Network training

Using differential evolution to train neural network. The task is basically the problem of optimizing the function of 83 arguments (number depends on architecture), which are the weights and biases of neural network. 

In [33]:
from keras.utils import to_categorical
from scipy import optimize as op
import pandas as pd
import numpy as np

In [35]:
# Upload the Iris dataset from file. 
dataset = pd.read_csv('iris.data', header=0)

# Shuffling dataset to split it into train and test set, and avoid high variance.
dataset = dataset.sample(frac=1).reset_index(drop=True)
dataset.head(10)

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,4.9,3.1,1.5,0.1,Iris-setosa
1,5.7,2.5,5.0,2.0,Iris-virginica
2,5.8,2.7,5.1,1.9,Iris-virginica
3,6.5,3.0,5.2,2.0,Iris-virginica
4,6.4,2.7,5.3,1.9,Iris-virginica
5,5.0,3.5,1.3,0.3,Iris-setosa
6,6.4,2.8,5.6,2.2,Iris-virginica
7,6.3,3.4,5.6,2.4,Iris-virginica
8,6.2,3.4,5.4,2.3,Iris-virginica
9,5.6,2.5,3.9,1.1,Iris-versicolor


In [36]:
# Adding bias unit.
dataset.insert(0, 'bias', 1)

# Mapping class names to numbers.
dataset['class'] = dataset['class'].map({'Iris-setosa':0, 
                                         'Iris-versicolor':1, 
                                         'Iris-virginica':2}).astype(int)
train = dataset[:100]
test = dataset[100:]

X = train.as_matrix(['bias',
                   'sepal length', 
                   'sepal width', 
                   'petal length', 
                   'petal width'])
y = train.as_matrix(['class'])
y = to_categorical(y)

X_test = test.as_matrix(['bias',
                   'sepal length', 
                   'sepal width', 
                   'petal length', 
                   'petal width'])
y_test = test.as_matrix(['class'])
y_test = to_categorical(y_test)

In [50]:
np.random.choice(X.shape[0], 3, replace=False)

array([72, 61, 31])

In [48]:
# Defining Rectified Linear Unit, which is used as actiation function here.
def relu(x):
    return x * (x > 0)

# Defining softmax, which is used in the output layer.
def softmax(a):
    s = np.exp(a - np.max(a))
    if a.ndim <3: return s/s.sum(0) 

# Defining Function to minimize.
def f(theta):
    # Reshaping 1D input weights to 2 2D matrix.    
    theta1 = theta[:40]
    bias1 = theta[40:50]
    theta2 = theta[50:80]
    bias2 = theta[80:]
    theta1 = theta1.reshape((10, 4))
    theta2 = theta2.reshape((3, 10))
    theta1 = np.column_stack((bias1, theta1))
    theta2 = np.column_stack((bias2, theta2))
    
    # Choosing a batch of 3 random samples.
    batch_idx = np.random.choice(X.shape[0], 10, replace=False)
    x_batch = X[batch_idx]
    y_batch
    
    # Calculationg the loss via cross entropy.
    h = relu(theta1.dot(x_batch.T))
    h = np.row_stack((np.ones(h.shape[1]), h))
    out = softmax(theta2.dot(h))
    loss = -np.log((y_batch.T * out).sum(0))
    return loss.sum()

In [49]:
print type(f_test)
f(f_test)

<type 'numpy.ndarray'>


ValueError: operands could not be broadcast together with shapes (3,100) (3,10) 

In [46]:
# bounds = [(-2., 2.)]*70
# op.differential_evolution(f, bounds, maxiter=50000)
bounds = [(-2., 2.)]*83
op.differential_evolution(f, bounds, maxiter=10000)

     fun: 4.758891322404432
     jac: array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        4.02050837e-02,  2.92593327e-01,  1.69106062e-01,  3.85239396e-02,
        4.78100226e-02, -1.72314873e-01, -6.63834321e-02, -6.99102998e-03,
       -1.12153309e-01,  5.72668579e-01,  2.42328024e-01,  3.34947181e-02,
        8.94559093e-02, -1.89144167e-01, -6.28975982e-02, -3.05586667e-03,
       -6.01767525e-03, -4.33436398e-01, -2.25637287e-01, -4.53131754e-02,
        8.57699520e+00,  3.56915848e+00,  6.25917256e+00,  1.38730165e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -1.52698298e-02,  4.10244017e-01,  2.03825490e-01,  3.82829768e-02,
        0.00000000e+00,  3.06670245e-03,  1.18228094e-02, -3.03838732e-02,
        2.04297024e-02,  5.66355851e-03,  4.24589874e-01,  0.00000000e+00,
        0.00000000e+00, -9.35962419e-03,  0.00000000e+00,  2.6