# The Training Process

The training process consists of two files located in Network, Evaluate.py and Train.py, each of which will be discussed in their respective sections.

 ### The Evaluate Function

The Evaluate.py file contains a function called evaluate. It takes in a neural network, a boolean variable called draw, and the max steps the evaluation will run. It then runs the game one time step at a time, at each time step giving the neural network the required inputs and moving the paddle down if it outputs a 0 or up if it outputs a 1.

As an output it returns the fitness value of the neural network and a boolean value finished which is true if the evaluation reached max steps.

In [1]:
from Pong.Game import Game


def evaluate(nn, draw, max_steps = 100000):
    finished = False
    offset = 10e+10
    outputs = []
    game = Game((1000, 600), draw=draw)
    running = 0
    time_steps = 0
    while running == 0:
        time_steps += 1
        inputs = game.getCords()
        output = nn.calc(inputs=[inputs[0], inputs[2], inputs[3]])[0]
        # print(output, nn.calc(inputs=[inputs[0], inputs[2], inputs[3]]))
        if output == 0:
            game.left_paddle.paddleMoveUp(-1)
        else:
            game.left_paddle.paddleMoveDown(1)
        game.gameStep()
        running = game.right_player
        outputs.append(output)
        if inputs[1] == -360:
            offset = abs(inputs[0] - inputs[2])
        if time_steps == max_steps:
            finished = True
            break
    if draw:
        for turtle in game.sc.turtles():
            turtle.reset()
            turtle.clear()
        game.sc.clear()
        game.sc.reset()
        game.sc.bye()
        del game
    return time_steps - offset, finished

In [10]:
from Network import Network

In [18]:
# Try playing around with different networks, they won't be very good, but maybe you will get lucky
layers = [8, 8, 1]
net = Network.Network(3, layers)
print(evaluate(net, False, 10000))


(344.0, False)


### The Train Class

The Train.py file consists of a helper function evalGen which takes a list of neural networks, runs evaluate on all of them and returns the network, score pair as a tuple, and a main class, Train.

The Train class takes in the number of generations, the size of each generation, and the shape of the network. It automatically adds the last layer so no need to add a 1 layer at the end.

The only function of the Train class is called train. It takes in 3 optional variables of slice_size, mutation_rate, change_value, and draw_frequency. The slize_size tells the function how many models to create the new generation from, while the mutation_rate and change_value are the same ones used in the update function in the Network class. Finally, the draw_frequency tell the function how often to display the best individual of the generation.

The function generates an initial generation and takes the best performing individuals and replicates them, calling update on all of these replicates. The process is repeated till a neural network reaches max_steps, or it does the max number of generations. It then displays the best performing model and writes the weights and biases to a file.

In [2]:
from Network.Network import Network
from operator import itemgetter
import numpy as np


def evalGen(gen, max_steps):
    scores = []
    for nn in gen:
        evaled = evaluate(nn, max_steps=max_steps, draw=False)
        scores.append((evaled[0], nn, evaled[1]))
    return scores


class Train:
    def __init__(self, generations, size=100, shape=None):
        if shape is None:
            shape = [8, 8, 8]
        self.generations = generations
        self.size = size
        shape.append(1)
        self.shape = (3, shape)

    def train(self, slice_size=10, draw_frequency=10, mutation_rate=0.5, change_value=5, max_steps=100000):
        gen = []
        best = []

        for i in range(self.size):
            gen.append(Network(self.shape[0], self.shape[1]))

        for i in range(self.generations):

            scores = evalGen(gen, max_steps=max_steps)
            scores = sorted(scores, key=itemgetter(0), reverse=True)
            best = scores[0:self.size // slice_size]

            newGen = []

            for nn in best:
                network = nn[1]
                weights, biases = network.getWeights()

                for j in range(len(weights)):
                    weights[j] = np.copy(weights[j])

                for j in range(slice_size-1):
                    net = Network(self.shape[0], self.shape[1], weights=weights, bias=np.copy(biases))
                    net.update(mutation_rate=mutation_rate, change_value=change_value)
                    newGen.append(net)

                newGen.append(network)

            gen = newGen

            # Run some inference type things

            unique = []

            for network in scores:
                if network[0] not in unique:
                    unique.append(network[0])
            print(best)
            print(unique)

            if best[0][2]:
                break

            if i % draw_frequency == 0 and i > 0:
                evaluate(best[0][1], draw=True)

        w, b = best[0][1].getWeights()
        with open('best.txt', 'w') as f:
            f.write('weights='+str(w)+'\n'+'bias='+str(b)+'\n')
        print("weights=" + str(w), "bias=" + str(b))
        evaluate(best[0][1], draw=True)

In [3]:
# Try playing around with various generation count, sizes, and neutral network architecture
# You can also change the mutation_rate, change_value, and slice_size, try to find the fewest amount of gnerations you can max out in.
t = Train(50, size=100, shape=[8, 8, 8])
t.train(slice_size=10, draw_frequency=100, mutation_rate=0.5, change_value=5, max_steps=100000)

  return array(a, order=order, subok=subok, copy=True)


[(1852.0, <Network.Network.Network object at 0x0000011032549D50>, False), (582.0, <Network.Network.Network object at 0x00000110340CB010>, False), (426.0, <Network.Network.Network object at 0x00000110340F4400>, False), (416.0, <Network.Network.Network object at 0x0000011032554310>, False), (412.0, <Network.Network.Network object at 0x00000110325488B0>, False), (406.0, <Network.Network.Network object at 0x00000110340F4640>, False), (402.0, <Network.Network.Network object at 0x000001103254B010>, False), (390.0, <Network.Network.Network object at 0x00000110325555D0>, False), (390.0, <Network.Network.Network object at 0x0000011032556C50>, False), (378.0, <Network.Network.Network object at 0x0000011032549990>, False)]
[1852.0, 582.0, 426.0, 416.0, 412.0, 406.0, 402.0, 390.0, 378.0, 370.0, 364.0, 356.0, 344.0, 286.0, 278.0, 260.0, 246.0, 188.0, 184.0, -60.0]
[(398.0, <Network.Network.Network object at 0x00000110325D0AC0>, False), (398.0, <Network.Network.Network object at 0x00000110325D0CA0>,