In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import multiprocessing
from multiprocessing.dummy import Pool

In [16]:
pool = Pool(4)

In [17]:
df = pd.read_csv('train.csv')

In [18]:
data = df.values.astype(np.float32)
np.random.shuffle(data)

In [19]:
X = data[:, 1:] / 255.
Y = data[:, 0].astype(np.int32)

In [20]:
Xtrain = X[:-1000]
Ytrain = Y[:-1000]
Xtest  = X[-1000:]
Ytest  = Y[-1000:]

In [21]:
D = Xtrain.shape[1]
M = 100
K = len(set(Y))

In [22]:
def softmax(a):
  c = np.max(a, axis=1, keepdims=True)
  e = np.exp(a - c)
  return e / e.sum(axis=-1, keepdims=True)

In [23]:
def relu(x):
  return x * (x > 0)

In [24]:
def log_likelihood(Y, P):
  N = len(Y)
  return np.log(P[np.arange(N), Y]).mean()

In [25]:
class ANN:
  def __init__(self, D, M, K):
    self.D = D
    self.M = M
    self.K = K
  def init(self):
    D, M, K = self.D, self.M, self.K
    self.W1 = np.random.randn(D, M) / np.sqrt(D)
    self.b1 = np.zeros(M)
    self.W2 = np.random.randn(M, K) / np.sqrt(M)
    self.b2 = np.zeros(K)
  def forward(self, X):
    Z = np.tanh(X.dot(self.W1) + self.b1)
    return softmax(Z.dot(self.W2) + self.b2)
  def score(self, X, Y):
    P = np.argmax(self.forward(X), axis=1)
    return np.mean(Y == P)
  def get_params(self):
    return np.concatenate([self.W1.flatten(), self.b1,
                           self.W2.flatten(),
                           self.b2])
  def set_params(self, params):
    D, M, K = self.D, self.M, self.K
    self.W1 = params[:D * M].reshape(D, M)
    self.b1 = params[D * M:D * M + M]
    self.W2 = params[D * M + M:D * M + M + M * K].reshape(M, K)
    self.b2 = params[-K:]

In [29]:
def evolution_strategy(f, population_size, sigma, lr, initial_params, num_iters):
  num_params = len(initial_params)
  reward_per_iteration = np.zeros(num_iters)
  params = initial_params
  for t in range(num_iters):
    t0 = datetime.now()
    noises = np.random.randn(population_size, num_params)
    rewards = pool.map(f, [params + sigma*noises[j] for j in range(population_size)])
    rewards = np.array(rewards)

    mean = rewards.mean()
    advantage = (rewards - mean) / rewards.std()
    reward_per_iteration[t] = mean
    params = params + lr / (population_size * sigma) * np.dot(noises.T, advantage)
    print("Iter:", t, "Avg Reward:", mean, "Duration:", (datetime.now() - t0))

  return params, reward_per_iteration

In [30]:
def reward_function(params):
  model = ANN(D, M, K)
  model.set_params(params)
  return model.score(Xtrain, Ytrain)

In [None]:
model = ANN(D, M, K)
model.init()
params = model.get_params()
best_params, rewards = evolution_strategy(
  f=reward_function,
  population_size=50,
  sigma=0.1,
  lr=0.2,
  initial_params=params,
  num_iters=600,
)

# plot the rewards per iteration
plt.plot(rewards)
plt.show()

# final train and test accuracy
model.set_params(best_params)
print("Train score:", model.score(Xtrain, Ytrain))
print("Test score:", model.score(Xtest, Ytest))

Iter: 0 Avg Reward: 0.08930829268292681 Duration: 0:00:38.400958
Iter: 1 Avg Reward: 0.13219853658536584 Duration: 0:00:35.674836
Iter: 2 Avg Reward: 0.16495853658536583 Duration: 0:00:38.147350
Iter: 3 Avg Reward: 0.21053756097560977 Duration: 0:00:35.995849
Iter: 4 Avg Reward: 0.23547853658536588 Duration: 0:00:37.679075
Iter: 5 Avg Reward: 0.2603985365853659 Duration: 0:00:36.594103
Iter: 6 Avg Reward: 0.286880487804878 Duration: 0:00:38.430512
Iter: 7 Avg Reward: 0.29825121951219513 Duration: 0:00:35.740210
Iter: 8 Avg Reward: 0.30247999999999997 Duration: 0:00:37.236367
Iter: 9 Avg Reward: 0.3511346341463415 Duration: 0:00:35.494504
Iter: 10 Avg Reward: 0.3761190243902439 Duration: 0:00:37.118562
Iter: 11 Avg Reward: 0.4099478048780487 Duration: 0:00:37.977372
Iter: 12 Avg Reward: 0.4414424390243903 Duration: 0:00:37.054806
Iter: 13 Avg Reward: 0.46046682926829263 Duration: 0:00:38.568333
Iter: 14 Avg Reward: 0.4808141463414634 Duration: 0:00:38.891797
Iter: 15 Avg Reward: 0.50600