Neuroevolution: Exercise 4
=========
###### Artur Ganzha 10019651
---------	
###### Raul Gorek 10061333
---------	

In [1]:
import numpy as np
import copy as cp

In [2]:
def derivative_bcel(prediction, ground_truth):
    x =  np.where(ground_truth == 0, 1.0 / (1.0 - prediction), -1.0 / prediction)
    return x

class Linear:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.W = np.random.uniform(-1, 1,(self.input_size,self.output_size))
        self.B = np.zeros((1, self.output_size))
    
    def forward(self, x):
        self.fw = x
        return np.dot(x, self.W) + self.B
    
    def backward(self, d, lr):
        d_w = np.dot(self.fw.T, d)
        d_e = np.dot(d, self.W.T)
        d_b = np.sum(d, axis=0, keepdims=True)
        self.W -= lr * d_w / self.fw.shape[0]
        self.B -= lr * d_b / self.fw.shape[0]
        return d_e


class ReLU:
    def __init__(self):
        pass

    def forward(self, x):
        self.fw = x
        return x * (x > 0)
    
    def backward(self, d, lr):
        return d * np.where(self.fw > 0, 1.0, 0.0)
    

class Sigmoid:
    def __init__(self):
        pass
    
    def forward(self, x):
        self.fw = x
        self.out = 1.0 / (1.0 + np.exp(-x))
        return self.out
    
    def backward(self, d, lr):
        return d * (self.out * (1.0 - self.out))
    

class NeuralNetwork:
    def __init__(self, layers: list):
        self.layers = layers

    def forward_pass(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward_pass(self, deriv, lr):
        for layer in reversed(self.layers):
            deriv = layer.backward(deriv, lr)


# Aufgabe 1

In [3]:
def print_weights(self):
    for layer in self.layers:
        if type(layer) == Linear:
            print("Bias: \n", layer.B)
            print("Weights: \n", layer.W)

def mutate_weights(self):
    for layer in self.layers:
        if type(layer) == Linear:
            layer.W += np.random.normal(0,1, size=layer.W.shape)
            layer.B += np.random.normal(0,1,size=layer.B.shape)


NeuralNetwork.mutate_weights = mutate_weights
NeuralNetwork.print_weights = print_weights

In [4]:
arch = [
    Linear(2,8),
    ReLU(),
    Linear(8,1),
    Sigmoid()
]
net = NeuralNetwork(arch)
net.print_weights()
net.mutate_weights()
net.print_weights()

Bias: 
 [[0. 0. 0. 0. 0. 0. 0. 0.]]
Weights: 
 [[ 0.72609461 -0.91190971  0.56544948 -0.40530036  0.15721535 -0.1135427
  -0.42260325 -0.3547749 ]
 [ 0.47263947 -0.13143757  0.83154339  0.36383336  0.93500126 -0.43025643
  -0.23235111  0.29828577]]
Bias: 
 [[0.]]
Weights: 
 [[ 0.20748512]
 [ 0.63852403]
 [ 0.29486175]
 [-0.31485759]
 [ 0.47829978]
 [ 0.46640971]
 [ 0.47934609]
 [ 0.04988258]]
Bias: 
 [[-0.84037203 -1.06723698  0.48104754  0.80929495 -1.61986695 -0.85353387
  -0.72197012  0.84513447]]
Weights: 
 [[-0.80476021 -1.147357   -0.11624117  0.21554518  0.21553871 -1.91694353
   0.27276284 -0.97173543]
 [ 0.11188649  0.99873087  1.0888878   1.61811882  0.2144705  -0.40618126
   0.5764016  -0.58597989]]
Bias: 
 [[-0.96813439]]
Weights: 
 [[-0.42506515]
 [-0.76624234]
 [-0.61190791]
 [-0.65981488]
 [ 0.37405602]
 [ 1.26893754]
 [-2.80023014]
 [ 0.81757682]]


# Aufgabe 2

In [5]:
batch = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
labels = np.array([[0.0], [1.0], [1.0], [0.0]]) 

In [6]:
def fitness(net: NeuralNetwork):
    y_hat = net.forward_pass(batch)
    accuracy = np.mean(1 - np.abs(labels - y_hat))
    return accuracy

In [7]:
def elitist_selection(individuals: list[NeuralNetwork], n):
    f = np.zeros(shape=(len(individuals,)))
    for i, net in enumerate(individuals):
        f[i]= (fitness(net))
    return [individuals[i] for i in (-f).argsort()[:n]]

In [8]:
def fitness_proportional_selection(individuals: list[NeuralNetwork], n):
    f = np.zeros(shape=(len(individuals,)))
    for i, net in enumerate(individuals):
        f[i]= (fitness(net))
    return list(np.random.choice(individuals, size=(n,), p=f/f.sum(), replace=False))

## Test mit Elitist

In [32]:
import time
import copy

popsize = 1
elite = 1
individuals = [NeuralNetwork([Linear(2,8), ReLU(), Linear(8,1), Sigmoid()]) for _ in range(popsize)]

max_compute_time = 5.0
t1 = time.perf_counter()
it = 0
while time.perf_counter() - t1 < max_compute_time:
    individuals = elitist_selection(individuals, elite)
    cop = copy.deepcopy(individuals[0])
    cop.mutate_weights()
    individuals.append(cop)
    it += 1

fittest = sorted(individuals, key=lambda x: fitness(x))[0]
print("Accuracy of fittest: ", fitness(fittest))
print("XOR of fittest: \n", fittest.forward_pass([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]))
print("Weights of fittest:")
print(fittest.print_weights())

Accuracy of fittest:  0.9999999995066218
XOR of fittest: 
 [[1.90903964e-09]
 [1.00000000e+00]
 [1.00000000e+00]
 [6.44732017e-11]]
Weights of fittest:
Bias: 
 [[ -2.93865288 -12.40487243   0.2822418    7.88665885   2.31474114
    6.73714254  -1.63847066   3.34725324]]
Weights: 
 [[  7.09256619  -5.76258805   0.3749416   -6.23455898  -5.04613591
    4.6795403   13.35063804   5.02091457]
 [ -2.36123224  -4.61007846  -4.71935196   1.1495039    0.0906418
   -7.31834872 -11.83062061  -4.79061021]]
Bias: 
 [[6.46558763]]
Weights: 
 [[  8.35737261]
 [ -6.2060298 ]
 [  3.56155371]
 [  8.32266465]
 [ -5.08906826]
 [ -6.05262743]
 [ 15.24663151]
 [-12.13777297]]
None


## Test mit Fitness Proportional

In [33]:
import time
import copy

popsize = 40
elite = 5
individuals = [NeuralNetwork([Linear(2,8), ReLU(), Linear(8,1), Sigmoid()]) for _ in range(popsize)]

max_compute_time = 5.0
t1 = time.perf_counter()
while time.perf_counter() - t1 < max_compute_time:
    individuals = fitness_proportional_selection(individuals, elite)
    mutated_ones = []
    i = 0
    while len(mutated_ones) < popsize - elite:
        cop = copy.deepcopy(individuals[i])
        cop.mutate_weights()
        mutated_ones.append(cop)
        i += 1
        i %= elite
    individuals += mutated_ones

fittest = sorted(individuals, key=lambda x: fitness(x))[0]
print("Accuracy of fittest: ", fitness(fittest))
print("XOR of fittest: \n", fittest.forward_pass([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]))
print("Weights of fittest:")
print(fittest.print_weights())

  self.out = 1.0 / (1.0 + np.exp(-x))


Accuracy of fittest:  0.5052999723313124
XOR of fittest: 
 [[9.78800111e-001]
 [1.70194592e-195]
 [1.00000000e+000]
 [4.93235036e-044]]
Weights of fittest:
Bias: 
 [[ -6.86884305   2.98348224  -5.23497388 -11.64712371 -22.55188824
    4.3329667    2.3208159  -11.00170399]]
Weights: 
 [[ -9.02863546   1.43989013  15.50261694  -3.38781866 -17.32870493
   13.03304171 -16.36512616 -24.66635871]
 [ -8.47247252   1.94502589 -12.11061017 -24.74442206 -25.33241005
   -0.66008609  16.95587228   1.53946307]]
Bias: 
 [[9.37712965]]
Weights: 
 [[ -5.93493489]
 [  1.35415428]
 [-29.3129626 ]
 [ -1.91400149]
 [  3.07503314]
 [ -9.37834061]
 [ 13.37940857]
 [-17.64581053]]
None


## Beobachtungen
Elitist Selection konvergiert manchmal gut, manchmal bleibt er in lokalem Minimum stecken.
Fitness Prop Selection haben wir gar nicht zur Konvergenz gebracht.  
- Liegt warhscheinlich daran, dass die Fitnesswerte oft viel zu nah beieinander liegen (Durch Sigmoid am Ende). Dadurch werden viele Netze mit schlechten Gewichten genommen und es gibt gar keine richtige Chance zu konvergieren

# Aufgabe 3

### Vergleich der beiden Modelle (Evo vs. Backprop)

#### Vergleich Genauigkeit

In [11]:
def train_xor_evo(max_compute_time, popsize, elite):
    individuals = [NeuralNetwork([Linear(2,8), ReLU(), Linear(8,1), Sigmoid()]) for _ in range(popsize)]
    t1 = time.perf_counter()
    while time.perf_counter() - t1 < max_compute_time:
        individuals = elitist_selection(individuals, elite)
        mutated_ones = [copy.deepcopy(individuals[0])]
        mutated_ones[0].mutate_weights()
        i = 1
        while len(mutated_ones) < popsize - elite:
            cop = copy.deepcopy(individuals[i])
            cop.mutate_weights()
            mutated_ones.append(cop)
            i += 1
            i %= elite
        individuals += mutated_ones

    fittest = sorted(individuals, key=lambda x: fitness(x))[0]
    return fitness(fittest)

In [12]:
xor_batch = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
xor_label = np.array([[0.0], [1.0], [1.0], [0.0]])
def train_backprop(max_time, batch_size, lr):
    net = NeuralNetwork([
        Linear(2,8),
        ReLU(),
        Linear(8,1),
        Sigmoid()
    ])
    t1 = time.perf_counter()
    while time.perf_counter() - t1 < max_time:
        batch = np.random.randint(0,2, size=(batch_size, 2))
        labels = np.logical_xor(batch[:, 0], batch[:, 1]).astype(float)[:,None]
        prediction = net.forward_pass(batch)
        loss_deriv = derivative_bcel(prediction, labels)
        net.backward_pass(loss_deriv, lr)
    prediction = net.forward_pass(xor_batch)
    return np.mean(1 - np.abs(xor_label - prediction))
    

In [13]:
train_time = 5.0
num_exp = 10
mean_evo_acc = np.zeros((10,))
mean_bp_acc = np.zeros_like(mean_evo_acc)
for i in range(num_exp):
    mean_evo_acc[i] = train_xor_evo(train_time, popsize=1, elite=1)
    mean_bp_acc[i] = train_backprop(train_time, batch_size=64, lr=0.05)
mean_evo_acc = np.mean(mean_evo_acc)
mean_bp_acc = np.mean(mean_bp_acc)
print("Evo Acc: ", 100*mean_evo_acc, "%")
print("BP Acc: ", 100*mean_bp_acc, "%")
print("Evo is ", 100*((mean_evo_acc/mean_bp_acc)-1), "% besser als Backpropagation")
print("Diff in %: ", 100 * np.abs(mean_evo_acc - mean_bp_acc))

Evo Acc:  87.49999999966167 %
BP Acc:  99.74897834961276 %
Evo is  -12.279803314896453 % besser als Backpropagation
Diff in %:  12.24897834995109


#### Vergleich Der Trainingszeit

In [25]:
def train_xor_evo_time(acc, popsize, elite):
    individuals = [NeuralNetwork([Linear(2,8), ReLU(), Linear(8,1), Sigmoid()]) for _ in range(popsize)]
    max_acc = 0.0
    t1 = time.perf_counter()
    while max_acc < acc:
        individuals = elitist_selection(individuals, elite)
        max_acc = fitness(individuals[0])
        mutated_ones = []
        i = 0
        while len(mutated_ones) < popsize - elite:
            cop = copy.deepcopy(individuals[i])
            cop.mutate_weights()
            mutated_ones.append(cop)
            i += 1
            i %= elite
        individuals += mutated_ones
        individuals += mutated_ones
    return time.perf_counter() - t1

In [15]:
xor_batch = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
xor_label = np.array([[0.0], [1.0], [1.0], [0.0]])
def train_backprop_time(acc, batch_size, lr):
    net = NeuralNetwork([
        Linear(2,8),
        ReLU(),
        Linear(8,1),
        Sigmoid()
    ])
    max_acc = 0.0
    t1 = time.perf_counter()
    while max_acc < acc:
        batch = np.random.randint(0,2, size=(batch_size, 2))
        labels = np.logical_xor(batch[:, 0], batch[:, 1]).astype(float)[:,None]
        prediction = net.forward_pass(batch)
        loss_deriv = derivative_bcel(prediction, labels)
        net.backward_pass(loss_deriv, lr)
        max_acc = np.mean(1 - np.abs(xor_label - net.forward_pass(xor_batch)))
    return time.perf_counter() - t1

In [34]:
min_acc = 0.7
num_exp = 5
mean_evo_time = np.zeros((10,))
mean_bp_time = np.zeros_like(mean_evo_time)
for i in range(num_exp):
    mean_evo_time[i] = train_xor_evo_time(acc=min_acc, popsize=10, elite=3)
    print("Evo")
    mean_bp_time[i] = train_backprop_time(acc=min_acc, batch_size=64, lr=0.05)
    print("BP")
mean_evo_time = np.mean(mean_evo_time)
mean_bp_time = np.mean(mean_bp_time)
print("Evo Durchschnittszeit: ", mean_evo_time, "s")
print("BP Durchschnittszeit: ", mean_bp_time, "s")
print("Evo is ", 100*(1-(mean_evo_time/mean_bp_time)), "% schneller als Backpropagation")
print("Diff in s: ", np.abs(mean_evo_time - mean_bp_time))

Evo
BP
Evo
BP
Evo
BP
Evo
BP
Evo
BP
Evo Durchschnittszeit:  0.014743297200038797 s
BP Durchschnittszeit:  0.14763163959978556 s
Evo is  90.01345697981382 % schneller als Backpropagation
Diff in s:  0.13288834239974676


So würden wir die Laufzeiten vergleichen. Funktioniert auch für geringere Acc's. Für höhere bleibt er einfach zu oft in lokalen Minimas hängen xD