In [1]:
# !python -m pip install mlagents==0.28.0
# !python -m pip install gym
# !cd "/Users/aditya/Documents/GitHub/game_creation_research/ml-agents/gym-unity" && pip3 install -e .

import mlagents
import numpy as np
import json
from mlagents_envs.environment import UnityEnvironment
from collections import namedtuple, defaultdict
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from gym_unity.envs import UnityToGymWrapper


In [6]:
from typing import List
Vector2 = namedtuple('Vector2', 'x y')

class Obs():
    def __init__(self, raw_obs):
        """
        Converts Unity Agent outputted Vector Observation to 
        named format
        """
        self.raw_obs = raw_obs
        self.objectOrder = ["corner", "bucket", "triangle", "gear", "crate"]
        self.objPos = {}
        self.objPos[self.objectOrder[0]] = Vector2(raw_obs[0], raw_obs[1])
        self.objPos[self.objectOrder[1]] = Vector2(raw_obs[2], raw_obs[3])
        self.objPos[self.objectOrder[2]] = Vector2(raw_obs[4], raw_obs[5])
        self.objPos[self.objectOrder[3]] = Vector2(raw_obs[6], raw_obs[7])
        self.objPos[self.objectOrder[4]] = Vector2(raw_obs[8], raw_obs[9])
        self.ballPos = Vector2(raw_obs[10], raw_obs[11])
        self.ballVel = Vector2(raw_obs[12], raw_obs[13])
        self.reset = bool(raw_obs[14])

    def show(self) -> None:
        """
        Pretty Print Observation
        """
        for each_obj in self.objPos:
            print(f"{each_obj}: {self.objPos[each_obj]}")
        print(f"Ball Position: {self.ballPos}")
        print(f"Ball Velocity: {self.ballVel}")
        print(f"In Reset?: {self.reset}")
    
    def toArray(self):
        return self.raw_obs

class PlaceAndShootGym(UnityToGymWrapper):
    def __init__(self, gym_env, reward_fn):
        self.gym_env = gym_env
        self.reward_fn = reward_fn
        # unsure if this is always true
        self.velTresh = 0.001

    def step(self, action):
        """
        Step is defined as doing something ball has stopped
        """
        obsVec = []
        # first step
        raw_obs, _reward, _done, info = self.gym_env.step(action)
        obsVec.append(Obs(raw_obs))
        # continued steps
        while (any([abs(f)>self.velTresh for f in obsVec[-1].ballVel])):
            raw_obs, _reward, done, info = self.gym_env.step(action)
            obsVec.append(Obs(raw_obs))
        reward = self.getRewards(obsVec)
        return (obsVec[-1].toArray(), reward, done, info)
    
    def getRewards(self, obsVec: List[Obs]) -> float:
        return float(self.reward_fn(obsVec))

    def close(self):
        self.gym_env.close()


In [7]:
def endsInBucket(obsVec: List[Obs]) -> bool:
    """
    Custom Reward Fn:
    Is that ball in bucket at the end or no?
    """
    MIN_X_DELTA = -0.1927506923675537
    MAX_X_DELTA = 0.2523689270019531
    MIN_Y_DELTA = -0.24334418773651123
    MAX_Y_DELTA = 0.6142134666442871

    ball_x, ball_y = obsVec[-1].ballPos
    bucket_x, bucket_y = obsVec[-1].objPos["bucket"]
    x_delta = ball_x - bucket_x
    y_delta = ball_y - bucket_y

    return (MAX_X_DELTA>=x_delta>=MIN_X_DELTA) and (MAX_Y_DELTA>=y_delta>=MIN_Y_DELTA)

In [4]:
SERVER_BUILD = "../Builds/MLAgent_View_21April22_server.app"
GRAPHICAL_BUILD = "../Builds/MLAgent_View_21April22.app"
GYM_BUILD = "../Builds/Gym_View_22April22.app"

# channel = EngineConfigurationChannel()
# channel.set_configuration_parameters(time_scale = 1.0, quality_level=5)
# unity_env = UnityEnvironment(file_name=GYM_BUILD, seed=1, side_channels=[channel], worker_id=1)

unity_env = UnityEnvironment()

# Start interacting with the environment.
unity_env.reset()
gym_env = UnityToGymWrapper(unity_env, allow_multiple_obs=False)
env = PlaceAndShootGym(gym_env, endsInBucket)

[INFO] Listening on port 5004. Start training by pressing the Play button in the Unity Editor.
[INFO] Connected to Unity environment with package version 2.2.1-exp.1 and communication version 1.5.0
[INFO] Connected new brain: PlaceAndShoot?team=0




In [8]:
action = [-1.4, -1.5, 0, 0, 0, 0]
env.step(action)

(array([ 5.3400002e+00, -2.6500001e+00,  5.3400002e+00,  1.8500000e+00,
         5.3400002e+00, -1.2500000e+00,  5.3400002e+00,  2.3999999e-01,
         5.3400002e+00,  3.2600000e+00,  2.6835985e+00, -4.6035752e+00,
        -9.7295688e-04,  0.0000000e+00,  0.0000000e+00], dtype=float32),
 0.0,
 False,
 {'step': <mlagents_envs.base_env.DecisionSteps at 0x7fc8ebcf4280>})

In [10]:
env.close()