In [40]:
import matplotlib.pyplot as plt
import numpy as np
import sys
import random
from gym_unity.envs import UnityEnv
%matplotlib inline

print("Python version:")
print(sys.version)

Python version:
3.6.6 |Anaconda, Inc.| (default, Jun 28 2018, 11:27:44) [MSC v.1900 64 bit (AMD64)]


In [41]:
number_of_observations = 42
number_of_actions = 6
num_of_generations = 20
episode_max_length = 1000

# Genetic algorithm parameters
mutation_rate = 0.05
max_mutation_value_change = 0.2
number_of_chromosomes = 40
number_of_elite_chromosomes = 4

# Neural network parameters
input_layer_nodes = number_of_observations + 1
hidden_layer_nodes = number_of_observations
output_layer_nodes = number_of_actions

render = True

In [42]:
def relu(x):
    return np.maximum(0,x)


def predict_using_neural_network(observation, chromosome):
    input_values = observation / max(np.max(np.linalg.norm(observation)), 1)
    input_values = np.insert(1.0, 1, input_values)
    hidden_layer_values = relu(np.dot(input_values, chromosome[0]))
    output_layer_values = relu(np.dot(hidden_layer_values, chromosome[1]))
    return np.argmax(output_layer_values)

In [43]:
def prepare_random_population(number_of_chromosomes):
    population = []
    for i in range(number_of_chromosomes):
        hidden_layer_weights = np.random.rand(input_layer_nodes, hidden_layer_nodes)*2 - 1
        output_layer_weights = np.random.rand(hidden_layer_nodes, output_layer_nodes)*2 - 1
        population.append([hidden_layer_weights, output_layer_weights])
    return population

In [44]:
def generate_actions(population, observation):
    actions = []
    for index in range(len(population)):
        actions.append(predict_using_neural_network(observation[index], population[index]))
    return actions

def run_episode(population):
    observation = env.reset()
    total_rewards = [0.0]*number_of_chromosomes
    # Running episode for every agent
    for t in range(episode_max_length):
        actions = generate_actions(population, observation)
        observation, reward, done, info = env.step(actions)
        total_rewards = [sum(x) for x in zip(total_rewards, reward)]

    return total_rewards

In [45]:
def change_to_flatten_list(chromosome):
    input_layer = chromosome[0]
    input_layer = input_layer.reshape(input_layer.shape[1], -1)
    hidden_layer = chromosome[1]
    return np.append(input_layer, hidden_layer.reshape(hidden_layer.shape[1], -1))


def mutation(chromosome):
    random_value = np.random.randint(0, len(chromosome))
    if random_value < mutation_rate:
        n = np.random.randint(0, len(chromosome))
        chromosome[n] += (np.random.rand()*max_mutation_value_change) - max_mutation_value_change / 2
    return chromosome


def crossover(best_chromosomes):
    new_population = best_chromosomes
    for index in range(number_of_chromosomes - number_of_elite_chromosomes):
        parents = random.sample(range(number_of_elite_chromosomes), 2)
        cut_point = random.randint(0, len(best_chromosomes[0]))
        new_chromosome = np.append(best_chromosomes[parents[0]][:cut_point], best_chromosomes[parents[1]][cut_point:])
        new_chromosome = mutation(new_chromosome)
        new_population.append(new_chromosome)
    return new_population

In [46]:
def generate_next_population(population, rewards):
    best_chromosomes_indexes = np.asarray(rewards).argsort()[-number_of_elite_chromosomes:][::-1]
    best_chromosomes_list = []
    for index in best_chromosomes_indexes:
        chromosome_flatten = change_to_flatten_list(population[index])
        best_chromosomes_list.append(chromosome_flatten)

    new_population_flatten = crossover(best_chromosomes_list)
    new_population = []
    for chromosome_flatten in new_population_flatten:
        input_layer_flatten = np.array(chromosome_flatten[:hidden_layer_nodes * input_layer_nodes])
        input_layer_reshaped = np.reshape(input_layer_flatten, (-1, population[0][0].shape[1]))
        hidden_layer_flatten = np.array(chromosome_flatten[hidden_layer_nodes * input_layer_nodes:])
        hidden_layer_reshaped = np.reshape(hidden_layer_flatten, (-1, population[0][1].shape[1]))
        new_population.append([input_layer_reshaped, hidden_layer_reshaped])

    return new_population

In [47]:
env_name = "../../env/multi-6-agent/Tanks" 
env = UnityEnv(env_name, worker_id=0, use_visual=False, multiagent=True)
print(str(env))

INFO:mlagents.envs:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Reset Parameters :
		
Unity brain name: PPOBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space size (per agent): 42
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): [6]
        Vector Action descriptions: 
INFO:gym_unity:10 agents within environment.


<UnityEnv instance>


In [48]:
population = prepare_random_population(number_of_chromosomes=number_of_chromosomes)
rewards = run_episode(population)
for generation in range(num_of_generations):
    population = generate_next_population(population, rewards)
    rewards = run_episode(population)
    best = np.amax(rewards)
    avg = np.average(rewards)
    print("Generation: {}, Avg: {}, Best: {}".format(generation + 1, avg, best))

env.close()

Generation: 1, Avg: 0.05999999827181455, Best: 0.8999999859297532
Generation: 2, Avg: 0.05999999827181455, Best: 0.8999999859297532
Generation: 3, Avg: -0.019999999600986484, Best: 0.8999999859297532
Generation: 4, Avg: -0.09999999747378752, Best: -0.09999999747378752
Generation: 5, Avg: -0.17000000061816536, Best: 0.3999999825391569
Generation: 6, Avg: -0.040000002406304705, Best: 0.6999999812542228
Generation: 7, Avg: 0.13999999614461558, Best: 0.8999999859297532
Generation: 8, Avg: -0.030000007677881513, Best: 1.3999999659426976
Generation: 9, Avg: 0.05999999827181455, Best: 0.8999999859297532
Generation: 10, Avg: -0.19999999815190678, Best: -0.09999999747378752
Generation: 11, Avg: -0.010000004872563296, Best: 0.8999999859297532
Generation: 12, Avg: -0.32000000163534426, Best: -1.021544449031353e-08
Generation: 13, Avg: -0.24999999849096638, Best: -0.09999999747378752
Generation: 14, Avg: -0.17000000061816536, Best: 0.09999998382409103
Generation: 15, Avg: -0.12000000027910573, Bes

In [10]:
env.close()