In [None]:
import tensorflow as tf      # Deep Learning library
from collections import namedtuple
import numpy as np           # Handle matrices
import random                # Handling random number generation
import time                  # Handling time calculation
from collections import deque# Ordered collection with ends
from keras.models import Sequential
from keras.layers import * #or use import Dense, Activation, Flatten
from keras.optimizers import * # or use import Adam
from unityagents import UnityEnvironment
import sys
from mlagents.envs import UnityEnvironment

In [None]:
population_size = 50
num_iterations = 500
mutate_pct = .05f
nb_actions = 20
memory = deque(maxlen=1000)
state_size = 129
sim_length = 500
env = UnityEnvironment(file_name=env_name)

In [None]:
def ValuesToGenome(lr, dr, er, hn):
    return '0' + str(lr) + str(dr) + str(er) + ('0' if hn < 100) + str(hn) 

In [None]:
def GenomeToValues(genome):
    lr = genome[0:3]
    dr = genome[3:5]
    er = genome[4:7]
    hn = genome[7:]
    return lr, dr, er, hn

In [None]:
def Get_State():
    brainInfo = info['CrawlerBrain']
    return brainInfo.vector_observations

In [None]:
def Send_Action(next_action):
    return env.step(next_action)

In [None]:
def goal_distance():
    brainInfo = info['CrawlerBrain']
    distanceVector = np.array(brainInfo.vector_observations[0], brainInfo.vector_observations[1], brainInfo.vector_observations[2])
    return np.linalg.norm(distanceVector)

In [None]:
def Determine_Reward(state, next_state):
    x1 = state[0]
    y1 = state[1]
    z1 = state[2]
    x2 = next_state[0]
    y2 = next_state[1]
    z2 = next_state[2]
    return (math.sqrt(x2**2 + y2**2 + z2**2) - math.sqrt(x1**2 + y1**2, z1**2))

In [None]:
def Train_Model(q_nn):
    state = env.reset()
    # Iterate the game
    state = Get_State()
    # time_t represents each frame of the game
    for time_t in range(sim_length):
        # Decide action
        action = agent.act(state)
        # Advance the game to the next frame based on the action.
        # Reward is 1 for every frame the pole survived
        next_state = Send_Action(action)
        reward = Determine_Reward(state, next_state)
        # Remember the previous state, action, reward, and done
        q_nn.remember(state, action, reward, next_state)
        # make next_state the new current state for the next frame.
        state = next_state
    # train the agent with the experience of the episode
    q_nn.replay(32)
    # print the score and break out of the loop
    print("episode: {}/{}, score: {}".format(e, episodes, time_t))
    return

In [None]:
def Fitness(genome):
    lr, dr, er, hn = GenomeToValues()
    q_nn = OurDQNAgent(state_size, nb_actions, lr, dr, er, hn)
    Train_Model(q_nn)
    return goal_distance()

In [None]:
def Generate():
    genome = ValuesToGenome(random.randint(1,100),random.randint(1,100),random.randint(1,100),random.randint(1,200))
    genomes = {genome:Fitness(genome)}
    for i in range(population_size-1):
        genome = ValuesToGenome(random.randint(1,100),random.randint(1,100),random.randint(1,100),random.randint(1,200))
        genomes += {genome:Fitness(genome)}
    return genomes

In [None]:
def K_Tournament_Select():
    entrent_size = random.randint(1,population_size)
    spot = random.randint(0,population_size+1)
    entrents = {spot:population[spot]}
    for i in range(entrent_size):
        spot = random.randint(0,population_size+1)
        entrents = {spot:population[spot]}
    winner = 0
    for i in entents.keys:
        if entrents[i] > entrents[winner]:
            winner = i
    return population.keys[winner]

In [None]:
def Crossover(population):
    #spot is the index of the start of the part to be crossed over
    spot = random.randint(1,9)
    parent1 = KTournamentSelect(population)
    parent2 = KtournamentSelect(population)
    temp = parent1
    parent1 = parent1[0:spot] + parent2[spot:]
    parent2 = parent2[0:spot] + temp[spot:]
    return parent1, parent2

In [None]:
def Mutate(genome):
    #spot is the index of the start of the part to be manipulated over
    spot = random.randint(0,9)
    if random.randint(0,101) / 100f <= mutate_pct:
        if spot == 6:
            if genome[6] == '0':
                genome[6] = '1'
            else:
                genome[6] = '0'
        else:
            oldval = int(genome[spot])
            while (val = random.randint(0,10)) != oldval
            genome[spot] = str(val)
    return

In [None]:
def Replace(genome, population):
    genome = Mutate(genome)
    lowest =sys.maxint
    worst = population[0]
    for g in population.keys:
        if population[g] < lowest:
            worst = g
            lowest = population[g]
    del(population[g])
    population += {genome, Fitness(genome)}
    return population

In [None]:
def Run():
    population = Generate()
    for i in range(num_iterations):
        parent1, parent2 = Crossover(population)
        population = Replace(parent1)
    return

In [None]:
if __name__ == "__main__":
    # initialize gym environment and the agent
    Run()