In [11]:
from server_comms import ServerComms, ServerMessageTypes
import time
import threading
import json
from IPython.display import clear_output
import numpy as np
import utils
import random

from collections import deque
from keras.models import Sequential
from keras.layers import Dense, Activation
from threading import Thread
import pickle
import multiprocessing
import os
import tensorflow as tf
from IPython.display import clear_output

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.logging.set_verbosity(tf.logging.ERROR)
np.set_printoptions(suppress=True)

In [3]:
server_message_types = ServerMessageTypes()

In [4]:
class ApiRunner:
    def __init__(self, port):
        self.is_connected = True
        self.server = ServerComms("10.3.141.79", port)
            
    def send(self, type, payload=None):
        self.server.send_message(type, payload)
        
    def disconnect(self):
        self.is_connected = False
        self.server.server_socket.close()

In [5]:
class TankRunner:
    def __init__(self, name, api_runner):
        self.name = name
        self.api_runner = api_runner
        self.start()
        self.setup_reward_variables()
        
    def setup_reward_variables(self):
        self.prev_closest_enemy_distance = None
        self.prev_health = None
        self.prev_num_of_enemies = None
        self.prev_heading_difference = None
        self.prev_fired = False
        self.prev_closest_item_distance = None
        
    def start(self):
        self.has_ended = False
        self.state = dict(me={}, enemies={}, items={})
        self.spawn()
        self.receive_thread = threading.Thread(target=self.read)
        self.receive_thread.daemon = True
        self.receive_thread.start()
        
    def spawn(self):
        self.send(server_message_types.CREATETANK, dict(Name=self.name))
        
    def despawn(self):
        self.send(server_message_types.DESPAWNTANK)
        
    def read(self):
        while not self.has_ended:
            message = self.api_runner.server.read_message()
            self.update_state(message)
     
    def update_state(self, message):
        if message == None: return
        if message['type'] == server_message_types.OBJECTUPDATE:
            data = message['data']
            if not isinstance(data, dict): return
            if 'Type' not in data.keys(): return
            
            if data['Type'] == 'Tank':
                if data['Name'] == self.name:
                    self.update_me(data)
                elif data['Name'] != self.name:
                    self.update_enemy(data)
            elif data['Type'] == 'AmmoPickup' or data['Type'] == 'HealthPickup':
                self.update_item(data)
    
    def update_me(self, data):
        self.state['me'] = data
    
    def update_enemy(self, data):
        self.state['enemies'][data['Name']] = data
    
    def update_item(self, data):
        self.state['items'][data['Id']] = data
    
    def send(self, type, payload=None):
        self.api_runner.send(type, payload)
    
    def stop(self):
        self.has_ended = True
        self.despawn()
        
    def do_action(self, action_type):
        if action_type == 0:
            self.send(server_message_types.TOGGLEFORWARD)
        elif action_type == 1:
            self.send(server_message_types.TOGGLEREVERSE)
        elif action_type == 2:
            self.send(server_message_types.TOGGLELEFT)
        elif action_type == 3:
            self.send(server_message_types.TOGGLERIGHT)
        elif action_type == 4:
            self.send(server_message_types.TOGGLETURRETLEFT)
        elif action_type == 5:
            self.send(server_message_types.TOGGLETURRETRIGHT)
        elif action_type == 6:
            self.prev_fired = True
            self.send(server_message_types.FIRE)
        elif action_type == 7:
            self.send(server_message_types.STOPMOVE)
        elif action_type == 8:
            self.send(server_message_types.STOPTURN)
        elif action_type == 9:
            self.send(server_message_types.STOPTURRET)
        elif action_type == 10:
            self.send(server_message_types.STOPALL)
    
    def get_closest_goal(self):
        me_coords = (self.state['me']['X'], self.state['me']['Y'])
        left_goal_coords = (0, 100)
        right_goal_coords = (0, -100)
        left_goal_distance = utils.calculate_distance(me_coords, left_goal_coords)
        right_goal_distance = utils.calculate_distance(me_coords, right_goal_coords)
        if (left_goal_distance < right_goal_distance):
            return left_goal_coords
        return right_goal_coords
    
    def get_closest_enemy(self):
        min_coords = (-99999, -99999)
        min_distance = -99999
        if len(self.state['enemies']) == 0: 
            return min_coords, self.prev_closest_enemy_distance if self.prev_closest_enemy_distance != None else abs(min_distance)
        
        me_coords = (self.state['me']['X'], self.state['me']['Y'])
        for enemy in self.state['enemies'].values():
            enemy_coords = (enemy['X'], enemy['Y'])
            distance = utils.calculate_distance(me_coords, enemy_coords)
            if min_distance == -99999 or min_distance < distance:
                min_distance = distance
                min_coords = enemy_coords
        return min_coords, round(min_distance, 3)
    
    def get_closest_item(self):
        min_coords = (-99999, -99999)
        min_distance = -99999
        if len(self.state['items']) == 0: 
            return min_coords, self.prev_closest_item_distance if self.prev_closest_item_distance != None else abs(min_distance)
        
        me_coords = (self.state['me']['X'], self.state['me']['Y'])
        for item in self.state['items'].values():
            item_coords = (item['X'], item['Y'])
            distance = utils.calculate_distance(me_coords, item_coords)
            if min_distance == -99999 or min_distance < distance:
                min_distance = distance
                min_coords = item_coords
        return min_coords, round(min_distance, 3)
    
    def get_number_of_enemies(self):
        return len(self.state['enemies'].keys())
    
    def calculate_reward(self):
        reward = 0
        done = False
        if len(self.state['me'].keys()) == 0: 
            return reward, done
        
        # reward on previous distance is less than normal from the closest enemy
        if self.prev_closest_enemy_distance == None:
            _, self.prev_closest_enemy_distance = self.get_closest_enemy()
        else:
            _, current_closest_enemy_distance = self.get_closest_enemy()
            reward += 5 if self.prev_closest_enemy_distance > current_closest_enemy_distance else 0
            reward += -5 if self.prev_closest_enemy_distance < current_closest_enemy_distance else 0
            self.prev_closest_enemy_distance = current_closest_enemy_distance
            
        # reward on previous distance is less than normal from the closest enemy
        if self.prev_closest_item_distance == None:
            _, self.prev_closest_item_distance = self.get_closest_item()
        else:
            _, current_closest_item_distance = self.get_closest_item()
            reward += 5 if self.prev_closest_item_distance > current_closest_item_distance else 0
            reward += -5 if self.prev_closest_item_distance < current_closest_item_distance else 0
            self.prev_closest_item_distance = current_closest_item_distance
        
        # reward on health increase and vice versa
        if self.prev_health == None:
            self.prev_health = self.state['me']['Health']
        else:
            current_health = self.state['me']['Health']
            reward += 25 if self.prev_health > current_health else 0
            reward += -10 if self.prev_health < current_health else 0
            self.prev_health = current_health
        
        # reward if enemies are smaller
        if self.prev_num_of_enemies == None:
            self.prev_num_of_enemies = len(self.state['enemies'].keys())
        else:
            current_num_of_enemies = len(self.state['enemies'].keys())
            reward += 50 if self.prev_num_of_enemies < current_num_of_enemies else 0
            self.prev_num_of_enemies = current_num_of_enemies
            
        # reward on heading difference is less than normal heading difference
        if self.prev_heading_difference == None:
            me_coords = (self.state['me']['X'], self.state['me']['Y'])
            enemy_coords, _ = self.get_closest_enemy()
            self.prev_heading_difference = utils.calculate_heading(me_coords, enemy_coords)
        else:
            me_coords = (self.state['me']['X'], self.state['me']['Y'])
            enemy_coords, _ = self.get_closest_enemy()
            current_heading_difference = utils.calculate_heading(me_coords, enemy_coords)
            reward += 10 if abs(self.prev_heading_difference - current_heading_difference) < 5 else 0
            reward += 30 if abs(self.prev_heading_difference - current_heading_difference) < 5 and self.prev_fired else 0            
            self.prev_heading_difference = current_heading_difference
            
        # reward on fire
        if self.prev_fired:
            reward -= 5
            self.prev_fired = False
        
        # reward (high) if heading is close to the turret heading
        if abs(self.state['me']['Heading'] - self.state['me']['TurretHeading']) < 10:
            reward += 10
        
        # high rewards for going closer to goal if kills > 0
        # TODO
        
        # if dead once
        if self.state['me']['Health'] == 0:
            done = True
        
        return reward, done
    
    def get_state_details(self):
        if len(self.state['me'].keys()) == 0: 
            return np.array([-9999]*12)
        
        me_coords = [round(self.state['me']['X'],3), round(self.state['me']['Y'],3)]
        closest_enemy_coords, closest_enemy_distance = self.get_closest_enemy()
        number_of_enemies = self.get_number_of_enemies()
        nearest_goal = self.get_closest_goal()
        heading = self.state['me']['Heading']
        turret_heading = self.state['me']['TurretHeading']
        health = self.state['me']['Health']
        closest_item_coords, closest_item_distance = self.get_closest_item()
        # kills
        # TODO
        return np.array([
                         round(me_coords[0], 3),
                         round(me_coords[1], 3),
                         round(heading, 3),
                         round(turret_heading, 3),
                         health,
                         round(closest_enemy_coords[0], 3),
                         round(closest_enemy_coords[1], 3),
                         round(closest_item_coords[0], 3),
                         round(closest_item_coords[1], 3),
                         round(number_of_enemies, 3),
                         round(nearest_goal[0], 3),
                         round(nearest_goal[1], 3),
                        ])
    
    def step(self, action):
        self.do_action(action)
        time.sleep(0.2)
        state = self.get_state_details()
        reward, done = self.calculate_reward()
        return state, reward, done

In [6]:
batch_size = 32
lock = threading.Lock()

In [7]:
class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_multiplier = 0.005
        self.epsilon_min = 0.01
        self.learning_rate = 0.01
        self.model = self._build_model()
        self.prev_rewards = deque(maxlen=10)
        
    def _build_model(self):
        model = Sequential()
        
        model.add(Dense(32, input_dim=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randint(0, self.action_size-1)
        
        action = self.model.predict(state)
        return np.argmax(action[0])
    
    def replay(self, batch_size):
        mini_batch = random.sample(self.memory, batch_size)
        
        summed_rewards = 0
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_current = self.model.predict(state)
            target_current[0][action] = target
            lock.acquire()
            try:
                self.model.fit(state, target_current, epochs=1, verbose=0)
            finally:
                lock.release()
            summed_rewards+=reward

        list_prev_rewards = list(self.prev_rewards)
        if len(list_prev_rewards) == self.prev_rewards.maxlen:
            # new accuracy is less than old one
            if sum(list_prev_rewards[0:5]) > sum(list_prev_rewards[5:]) and self.epsilon < 1:
                self.epsilon *= 1 + self.epsilon_multiplier
            elif self.epsilon > self.epsilon_min:
                self.epsilon *= 1 - self.epsilon_multiplier
        else:
            self.epsilon*= 1 - self.epsilon_multiplier
            
        self.prev_rewards.append(summed_rewards)

    def set_weights(self, matrix):
        weights = matrix["weights"]
        biases = matrix["biases"]
        for i, layer in enumerate(self.model.layers):
            layer.set_weights(np.array([weights[i], biases[i]]))
            
    def load(self, name):
        self.model.load_weights(name)
    
    def save(self, name):
        self.model.save_weights(name)
        
    def get_model(self):
        return self.model

In [14]:
state_size = 12
action_size = 11
n_episodes = 20
frames = 500
default_port = 8050

In [15]:
class GA:
    def __init__(self, population_size):
        self.generations = 1
        self.population_size = population_size
        self.manager = multiprocessing.Manager()
        self.population = self.manager.dict()
        self.initialise_specie_names()
        self.initiate_evolution()
        
    def initialise_specie_names(self):
        for specie_num in range(1, self.population_size+1):
            name = "specie_{}".format(specie_num)
            self.population[name] = []
    
    def initiate_evolution(self, has_prior_genetic_code=False):
        print("Initiating evolution generation {}...".format(self.generations))
        generations = []
        specie_names = list(self.population.keys())
        # initiate generations
        for specie_num in specie_names:
            generation = multiprocessing.Process(name=specie_num, target=self.initiate_generation, args=(specie_num, has_prior_genetic_code, self.population))
            generation.start()
            generations.append(generation)
        
        # wait for all the generations to finish
        for generation in generations:
            generation.join()
        
        # increment a generation
        self.generations += 1
        
        # start genetic cross over
        self.initiate_genetic_crossover()
            
    def initiate_generation(self, name, has_prior_genetic_code, population):
        index = int(name.split("_")[1])-1
        print("Initiating {}...".format(name))
        api_runner = ApiRunner(default_port + index)
        agent = Agent(state_size, action_size)
        if (has_prior_genetic_code):
            agent.set_weights(self.population[name])
        for episode in range(n_episodes):
            tank_runner = TankRunner("Robo-{}".format(episode), api_runner)
            state = tank_runner.get_state_details()
            state = np.reshape(state, [1, state_size])

            for frame in range(frames):
                action = agent.act(state)
                next_state, reward, done = tank_runner.step(action)
                reward = reward if not done else -10
                next_state = np.reshape(next_state, [1, state_size])
                agent.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    print("Episode: {}/{}, score: {}, e: {:.2}".format(episode, n_episodes, frame, agent.epsilon))
                    break

                if len(agent.memory) > batch_size:
                    agent.replay(batch_size)
            tank_runner.despawn()
        self.save_genetic_information(name, agent, population)
        del agent
        del api_runner
    
    def save_genetic_information(self, name, agent, population):
        weights = []
        biases = []
        
        for layer in agent.get_model().layers:
            temp_weights = layer.get_weights()
            weights.append(temp_weights[0])
            biases.append(temp_weights[1])
        
        population[name] = dict(weights=weights, biases=biases)
    
    def initiate_genetic_crossover(self):
        generations = list(self.population.keys())
        children = []
        for _ in range(len(generations)):
            parent_one, parent_two = random.sample(list(range(self.population_size)), 2)
            parent_one_info = self.population[generations[parent_one]]["weights"]
            parent_two_info = self.population[generations[parent_two]]["weights"]
            child = self.mutate(parent_one_info, parent_two_info)
            children.append(child)
        
        for i, name in enumerate(generations):
            self.population[name]["weights"] = children[i]
            
        self.save_generation()
        
        # restart the evolution with prior genetic code
        clear_output()
        self.initiate_evolution(has_prior_genetic_code=True)
    
    def save_generation(self):
        f = open('latest_population.pkl', 'wb')
        pickle.dump(self.population, f)
        f.close()
    
    def mutate(self, parent_one, parent_two):
        child = []
        for layer_index in range(len(parent_one)):
            parent_one_layer = parent_one[layer_index]
            parent_two_layer = parent_two[layer_index]
            child_layer = []
            for weight_index in range(len(parent_one_layer)):
                child_layer.append(parent_one_layer[weight_index] 
                                       if random.randint(0, 1) == 0 
                                       else parent_two_layer[weight_index])
            child.append(child_layer)
        return child

In [16]:
evolution = GA(2)

Initiating evolution generation 1...
Initiating specie_1...
Initiating specie_2...


Process specie_1:
Process specie_2:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/hemangkandwal/miniconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/hemangkandwal/miniconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/hemangkandwal/miniconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hemangkandwal/miniconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-15-6f6e2fce5194>", line 49, in initiate_generation
    next_state, reward, done = tank_runner.step(action)


KeyboardInterrupt: 

  File "<ipython-input-15-6f6e2fce5194>", line 49, in initiate_generation
    next_state, reward, done = tank_runner.step(action)
  File "<ipython-input-5-2c62420c54f0>", line 237, in step
    time.sleep(0.2)
  File "<ipython-input-5-2c62420c54f0>", line 237, in step
    time.sleep(0.2)
KeyboardInterrupt
KeyboardInterrupt
