In [1]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

In [2]:
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def find_notebook(self, fullname, path=None):
        name = fullname.rsplit('.', 1)[-1]
        if not path:
            path = ['']
        for d in path:
            nb_path = os.path.join(d, name + ".ipynb")
            if os.path.isfile(nb_path):
                return nb_path
            # let import Notebook_Name find "Notebook Name.ipynb"
            nb_path = nb_path.replace("_", " ")
            if os.path.isfile(nb_path):
                return nb_path
        
    def load_module(self, fullname):
        """import a notebook as a module"""
        path = self.find_notebook(fullname)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
a = NotebookLoader()
a.load_module('sim')
a.load_module('custom_env')

importing Jupyter notebook from sim.ipynb
importing Jupyter notebook from custom_env.ipynb
importing Jupyter notebook from sim.ipynb


<module 'custom_env' from 'custom_env.ipynb'>

In [3]:
import logging

import tensorflow as tf

from tensorforce.agents import Agent
from tensorforce.environments import Environment
from tensorforce.execution import Runner

import custom_env

from sim import GameSim

In [4]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

In [None]:
# Copyright 2018 Tensorforce Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Create an OpenAI-Gym environment
environment = Environment.create(
    environment='custom_env.CustomEnvironment', max_episode_timesteps=40
)

# Create a PPO agent
agent = Agent.create(
    agent='ppo', environment=environment,
    # Automatically configured network
    network='auto',
    # Optimization
    batch_size=10, update_frequency=2, learning_rate=1e-3, subsampling_fraction=0.2,
    optimization_steps=5,
    # Preprocessing
    preprocessing=None,
    # Exploration
    exploration=0.22, variable_noise=0.0,
    # Regularization
    l2_regularization=0.0, entropy_regularization=0.0,
    # TensorFlow etc
    name='agent', device=None, parallel_interactions=1, seed=None, execution=None, saver=None,
    summarizer=None, recorder=None
)

# Initialize the runner
runner = Runner(agent=agent, environment=environment)

# Start the runner
runner.run(num_episodes=100)
runner.close()

Episodes:   0%|          | 0/100 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

The AI decided to move to (0, 2), and was given a reward of -1
The AI decided to move to (1, 2), and was given a reward of -1
The AI decided to move to (0, 2), and was given a reward of -1
Gathered Item
The AI decided to move to (0, 3), and was given a reward of 35


Episodes:   1%|          | 1/100 [00:11, reward=30.00, ts/ep=6, sec/ep=11.77, ms/ts=1961.5, agent=32.0%]

The AI decided to move to (3, 2), and was given a reward of -1
The AI decided to move to (2, 2), and was given a reward of -1
The AI decided to move to (1, 2), and was given a reward of -1
The AI decided to move to (1, 1), and was given a reward of -1
The AI decided to move to (1, 2), and was given a reward of -1
The AI decided to move to (1, 3), and was given a reward of -1
The AI decided to move to (1, 2), and was given a reward of -1
The AI decided to move to (0, 2), and was given a reward of -1
The AI decided to move to (0, 3), and was given a reward of -1
The AI decided to move to (1, 3), and was given a reward of -1
The AI decided to move to (1, 2), and was given a reward of -1
The AI decided to move to (0, 2), and was given a reward of -1
The AI decided to move to (0, 3), and was given a reward of -1
The AI decided to move to (1, 3), and was given a reward of -1
The AI decided to move to (2, 3), and was given a reward of -1
The AI decided to move to (2, 2), and was given a rewar

Episodes:   2%|▏         | 2/100 [01:17, reward=-40.00, ts/ep=40, sec/ep=66.12, ms/ts=1653.0, agent=0.1%]

The AI decided to move to (3, 1), and was given a reward of -1
The AI decided to move to (3, 0), and was given a reward of -1
The AI decided to move to (3, 1), and was given a reward of -1
The AI decided to move to (3, 0), and was given a reward of -1
The AI decided to move to (3, 1), and was given a reward of -1
The AI decided to move to (2, 1), and was given a reward of -1
The AI decided to move to (1, 1), and was given a reward of -1
The AI decided to move to (0, 1), and was given a reward of -1
The AI decided to move to (1, 1), and was given a reward of -1
The AI decided to move to (0, 1), and was given a reward of -1
The AI decided to move to (0, 0), and was given a reward of -1
The AI decided to move to (1, 0), and was given a reward of -1
The AI decided to move to (1, 1), and was given a reward of -1
The AI decided to move to (2, 1), and was given a reward of -1
The AI decided to move to (3, 1), and was given a reward of -1
The AI decided to move to (3, 0), and was given a rewar

In [None]:
# simulation = GameSim()
# for i in range()