In [7]:
import asyncio
import subprocess
import time
import numpy as np
import json
import gym
from websocket import create_connection
from gym import spaces
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.env_util import make_vec_env

In [8]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

In [9]:
class UnrealAgentsEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, exec_path, port):
        super(UnrealAgentsEnvironment, self).__init__()
        
        self.__initialize_unreal_exec(exec_path, port)

    def step(self, action):
        step_info = self.__step(action)
        return np.array(step_info[0]['observations']), step_info[0]['reward'], step_info[0]['done'], step_info[0]['info']

    def reset(self):
        spet_info = self.__reset()
        return spet_info[0]['observations']

    def render(self, mode='human'):
        return 0

    def close (self):
        command = { "type": "close", "data": "" }      
        self.connection.send(json.dumps(command))
    
    def __reset(self):
        command = { "type": "reset", "data": "" }
        
        self.connection.send(json.dumps(command))        
        step_info = json.loads(self.connection.recv())
        
        for step in step_info:
            step['info'] = {}
        
        return step_info
    
    def __step(self, action):
        actions = {}
        agent_action = {"discreteActions": [action], "continuousActions": []}
        actions[self.agent] = agent_action
            
        command = { "type": "step", "data": json.dumps(actions, cls=NpEncoder) }
        self.connection.send(json.dumps(command))

        step_info = json.loads(self.connection.recv())
        
        for step in step_info:
            step['info'] = {}
        
        return step_info
    
    def __initialize_unreal_exec(self, exec_path, port):
        game_instance = subprocess.Popen([exec_path, '-windowed', '-ResX=640', '-ResY=360', "-proxyPort={0}".format(port), "-recordMetrics", "-flushInterval=10"])
        
        self.connection = None
        while_timeout = time.time() + 120
        while self.connection == None and time.time() < while_timeout:
            try:
                self.connection = create_connection("ws://localhost:{0}/ws".format(port))
            except:
                if game_instance.poll() != None:
                    raise
                print("Retrying environment connection...")

In [10]:
class UnrealAgentsCartPole(UnrealAgentsEnvironment):
    
    def __init__(self, exec_path, port):
        super(UnrealAgentsCartPole, self).__init__(exec_path, port)
        
        self.action_space = spaces.Discrete(2)
        
        high = np.array([50,
                         np.finfo(np.float32).max,
                         15,
                         np.finfo(np.float32).max],
                        dtype=np.float32)
        
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        self.agent = "Cart1"
        
class UnrealAgentsBallPusher(UnrealAgentsEnvironment):
    
    def __init__(self, exec_path, port):
        super(UnrealAgentsBallPusher, self).__init__(exec_path, port)
        
        self.action_space = spaces.Discrete(6)
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(144,), dtype=np.float32)
        self.agent = "BallPusherAgent1_1"

In [None]:
#CartPole Learning
env = make_vec_env(UnrealAgentsCartPole, n_envs=1, env_kwargs={'port': 8008, 'exec_path': 'PATH_TO_EXE'})
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log='PATH_TO_LOG')
model.learn(total_timesteps=100000, tb_log_name = 'ppo_cartpole_1')
model.save("ppo_cartpole_1")
env.close()

In [None]:
#CartPole Inference
model = PPO.load("ppo_cartpole_1")
env = make_vec_env(UnrealAgentsCartPole, n_envs=1, env_kwargs={'port': 8008, 'exec_path': 'PATH_TO_EXE'})
obs = env.reset()
for i in range(100000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
env.close()

In [None]:
#BallPusher Learning
env = make_vec_env(UnrealAgentsBallPusher, n_envs=1, env_kwargs={'port': 8008, 'exec_path': 'PATH_TO_EXE'})
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log='PATH_TO_LOG')
model.learn(total_timesteps=100000, tb_log_name = 'ppo_ballpusher_1')
model.save("ppo_ballpusher_1")
env.close()

In [None]:
#BallPusher Inference
model = PPO.load("ppo_ballpusher_1")
env = make_vec_env(UnrealAgentsBallPusher, n_envs=1, env_kwargs={'port': 8008, 'exec_path': 'PATH_TO_EXE'})
obs = env.reset()
for i in range(100000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
env.close()