This is the file that creates the custom environment for the AI to work in. It uses all the functions we made in the sim file

In [6]:
### Don't worry about this!! ###
from IPython.display import HTML
import random

def hide_toggle(for_next=False):
    this_cell = """$('div.cell.code_cell.rendered.selected')"""
    next_cell = this_cell + '.next()'

    toggle_text = 'Toggle show/hide'  # text shown on toggle link
    target_cell = this_cell  # target cell to control with toggle
    js_hide_current = ''  # bit of JS to permanently hide code in current cell (only when toggling next cell)

    if for_next:
        target_cell = next_cell
        toggle_text += ' next cell'
        js_hide_current = this_cell + '.find("div.input").hide();'

    js_f_name = 'code_toggle_{}'.format(str(random.randint(1,2**64)))

    html = """
        <script>
            function {f_name}() {{
                {cell_selector}.find('div.input').toggle();
            }}

            {js_hide_current}
        </script>

        <a href="javascript:{f_name}()">{toggle_text}</a>
    """.format(
        f_name=js_f_name,
        cell_selector=target_cell,
        js_hide_current=js_hide_current, 
        toggle_text=toggle_text
    )

    return HTML(html)
hide_toggle()

In [1]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell
import logging
import tensorflow as tf
import time

from tensorforce.agents import Agent
from tensorforce.environments import Environment
from tensorforce.execution import Runner

In [5]:
### Don't worry about this!! ###
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path
        
    def find_notebook(self, fullname, path=None):
        name = fullname.rsplit('.', 1)[-1]
        if not path:
            path = ['']
        for d in path:
            nb_path = os.path.join(d, name + ".ipynb")
            if os.path.isfile(nb_path):
                return nb_path
            # let import Notebook_Name find "Notebook Name.ipynb"
            nb_path = nb_path.replace("_", " ")
            if os.path.isfile(nb_path):
                return nb_path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = self.find_notebook(fullname)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
a = NotebookLoader()
a.load_module('sim')
hide_toggle()

importing Jupyter notebook from sim.ipynb


In [None]:
import sim

In [None]:
### Creates the custom environment that the AI will use to play the game ###

### Creates a class which is an object that the Tensorforce program uses to do certain things ###
class CustomEnvironment(Environment):
    
    ### Here we create an object from the class that WE made ###
    gameSimulation = sim.GameSim()
    
    ### We initialize the class ###
    def __init__(self):
        super().__init__()

    ### Here the Tensorforce program uses this attribute to figure out how many things the AI is given describing
    ### the world around it. It calls it like this custom_env.CustomEnvironment.states() ###
    def states(self):
        return dict(type='float', shape=(49,))
    
    ### Here we tell the Tensorforce program what decisions the AI can say ###
    def actions(self):
        return {"up": dict(type="float", min_value=0.0, max_value=1.0),
                 "down": dict(type="float", min_value=0.0, max_value=1.0),
                 "left": dict(type="float", min_value=0.0, max_value=1.0),
                 "right": dict(type="float", min_value=0.0, max_value=1.0),
                 }
    ### Here we tell it how many turns it gets in one game ###
    def max_episode_timesteps(self):
        return super().max_episode_timesteps()

    ### Here is where we shut down the AI ###
    def close(self):
        super().close()
    
    ### Here is where we reset the game for the AI ###
    def reset(self):
        ### Here we use the gameSimulation object instance and use its reset() function to reset the game ###
        self.gameSimulation.reset()
        return self.gameSimulation.get_state()

    ### Here is where the AI actually plays the game! ###
    def execute(self, actions):
        ### Here we check if the move the AI made is valid, passing the actions to the gameSimulation objects ###
        ### move_check() function ###
        if self.gameSimulation.move_check(actions):
            ### If the move is valid then we use the simulations movePlayer() function, passing it the actions ###
            ### and getting the AI's new_position back! We then set the new position as a variable so we can use it later ###
            new_position = self.gameSimulation.movePlayer(actions)
            ### We then use the simulation objects reward() function to set the AI's reward and check to see if the ###
            ### game is over, by passing in the AI's new position that we set earlier. We can do this by setting ###
            ### two variables that get returned by the reward() function. reward & gameOver. Reward is a number, and ###
            ### gameOver is True or False. ###
            reward, gameOver = self.gameSimulation.reward(new_position)
            ### Here we just print what is happening ###
            print(f"The AI decided to move to {new_position}, and was given a reward of {reward}")
            ### I use this to slow the execution of the code down ###
            time.sleep(2)
            ### Finally, we check to see if the game is over or not, setting terminal to True or False accordingly. ###
            if gameOver is True:
                terminal = True
            else:
                terminal = False
        ### If the AI made an invalid move we just tell it to try again and take away 1 point ###
        else:
            reward = -1
            terminal = False
        ### Finally we return the new state of the game for the AI to look at for its next move. Along with the reward ###
        ### and whether it won or not with terminal. It uses all these things to try and make better decisions in the future ###
        return self.gameSimulation.get_state(), terminal, reward