In [1]:
# !python -m pip install mlagents==0.28.0
# !python -m pip install gym
# !cd "/Users/aditya/Documents/GitHub/game_creation_research/ml-agents/gym-unity" && pip3 install -e .

import mlagents
import numpy as np
import json
from mlagents_envs.environment import UnityEnvironment
from collections import namedtuple, defaultdict
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from gym_unity.envs import UnityToGymWrapper


In [2]:
from typing import List

Vector2 = namedtuple('Vector2', 'x y')
objectOrder = ["corner", "bucket", "triangle", "gear", "crate"]


def objectTagToActionVal(object):
    idx = objectOrder.index(object)
    # 0 is abstain
    idx = idx + 1
    return idx/6.0


def ActionValToObjectTag(value):
    assert -1 <= value <= 1
    value = abs(value)
    value *= 6
    value = round(value)
    value = value - 1
    if value == -1:
        return None
    else:
        return objectOrder[value]


class Obs():
    def __init__(self, raw_obs):
        """
        Converts Unity Agent outputted Vector Observation to 
        named format
        """
        self.raw_obs = raw_obs
        self.objPos = {}
        self.objPos[objectOrder[0]] = Vector2(raw_obs[0], raw_obs[1])
        self.objPos[objectOrder[1]] = Vector2(raw_obs[2], raw_obs[3])
        self.objPos[objectOrder[2]] = Vector2(raw_obs[4], raw_obs[5])
        self.objPos[objectOrder[3]] = Vector2(raw_obs[6], raw_obs[7])
        self.objPos[objectOrder[4]] = Vector2(raw_obs[8], raw_obs[9])
        self.ballPos = Vector2(raw_obs[10], raw_obs[11])
        self.ballVel = Vector2(raw_obs[12], raw_obs[13])
        self.reset = bool(raw_obs[14])

    def show(self) -> None:
        """
        Pretty Print Observation
        """
        for each_obj in self.objPos:
            print(f"{each_obj}: {self.objPos[each_obj]}")
        print(f"Ball Position: {self.ballPos}")
        print(f"Ball Velocity: {self.ballVel}")
        print(f"In Reset?: {self.reset}")

    def toArray(self):
        return self.raw_obs


class ActionTransformer():
    def __init__(self, ban_object=[], ban_mouse_position_x=(99, 999), ban_mouse_position_y=(99, 999),
                 ban_object_position_x=(99, 999), ban_object_position_y=(99, 999)):
        self.ban_object = ban_object
        assert ban_mouse_position_x[1] > ban_mouse_position_x[0]
        self.ban_mouse_position_x = ban_mouse_position_x
        assert ban_mouse_position_y[1] > ban_mouse_position_y[0]
        self.ban_mouse_position_y = ban_mouse_position_y
        assert ban_object_position_x[1] > ban_object_position_x[0]
        self.ban_object_position_x = ban_object_position_x
        assert ban_object_position_y[1] > ban_object_position_y[0]
        self.ban_object_position_y = ban_object_position_y

    def transform(self, raw_action):
        obj = ActionValToObjectTag(raw_action[4])
        if obj in self.ban_object:
            return [0, 0, 0, 0, 0, 0]
        if self.ban_mouse_position_x[0] <= raw_action[0] <= self.ban_mouse_position_x[1]:
            return [0, 0, 0, 0, 0, 0]
        if self.ban_mouse_position_y[0] <= raw_action[1] <= self.ban_mouse_position_y[1]:
            return [0, 0, 0, 0, 0, 0]
        if self.ban_object_position_x[0] <= raw_action[2] <= self.ban_object_position_x[1]:
            return [0, 0, 0, 0, 0, 0]
        if self.ban_object_position_y[0] <= raw_action[3] <= self.ban_object_position_y[1]:
            return [0, 0, 0, 0, 0, 0]
        return raw_action


class PlaceAndShootGym(UnityToGymWrapper):
    def __init__(self, gym_env, reward_fn, actionTransformer=ActionTransformer()):
        self.gym_env = gym_env
        self.reward_fn = reward_fn
        self.actionTransformer = actionTransformer
        # unsure if this is always true
        self.velTresh = 0.001

    def step(self, raw_action):
        """
        Step is defined as doing something ball has stopped
        """
        action = self.actionTransformer.transform(raw_action)
        obsVec = []
        # first step
        raw_obs, _reward, done, info = self.gym_env.step(action)
        obsVec.append(Obs(raw_obs))
        # continued steps
        while (any([abs(f) > self.velTresh for f in obsVec[-1].ballVel])):
            raw_obs, _reward, done, info = self.gym_env.step(action)
            obsVec.append(Obs(raw_obs))
        reward = self.getRewards(obsVec)
        return (obsVec[-1].toArray(), reward, done, info)

    def setup(self, actionVec, transform = False) -> bool:
        """
        Setup steps must be a sequence of actions that end with a reset of the ball
        """
        assert actionVec[-1][-1] == 1
        for each_raw_action in actionVec:
            if transform:
                each_action = self.actionTransformer.transform(each_raw_action)
            else:
                each_action = each_raw_action
            self.gym_env.step(each_action)

    def getRewards(self, obsVec: List[Obs]) -> float:
        return float(self.reward_fn(obsVec))

    def reset(self):
        self.gym_env.reset()

    def close(self):
        self.gym_env.close()


In [3]:
# crate in the middle and bucket on floor for bounce and bucket game
GAME_2_SETUP = [[0, 0, -1.262854/10.0, 2.134222/10.0, objectTagToActionVal("crate"), 0],
                [0, 0, -4.847445/10.0, -4.238384/10.0,
                    objectTagToActionVal("bucket"), 0],
                [0, 0, 0, 0, 0, 1]]


def endsInBucket(obsVec: List[Obs]) -> bool:
    """
    Custom Reward Fn:
    Is that ball in bucket at the end or no?
    """
    MIN_X_DELTA = -0.1927506923675537
    MAX_X_DELTA = 0.2523689270019531
    MIN_Y_DELTA = -0.24334418773651123
    MAX_Y_DELTA = 0.6142134666442871

    ball_x, ball_y = obsVec[-1].ballPos
    bucket_x, bucket_y = obsVec[-1].objPos["bucket"]
    x_delta = ball_x - bucket_x
    y_delta = ball_y - bucket_y

    return (MAX_X_DELTA >= x_delta >= MIN_X_DELTA) and (MAX_Y_DELTA >= y_delta >= MIN_Y_DELTA)


NO_OBJECT_INTERACTION = ActionTransformer(ban_object=[
                                          "crate", "bucket", "corner", "gear", "triangle"], ban_object_position_x=(-1, 1), ban_object_position_y=(-1, 1))


In [9]:
SERVER_BUILD = "../Builds/MLAgent_View_21April22_server.app"
GRAPHICAL_BUILD = "../Builds/MLAgent_View_21April22.app"
GYM_BUILD = "../Builds/Gym_View_22April22.app"

channel = EngineConfigurationChannel()
channel.set_configuration_parameters(time_scale = 20, quality_level=0)
unity_env = UnityEnvironment(file_name=GYM_BUILD, seed=1, side_channels=[channel], worker_id=0)

# unity_env = UnityEnvironment()

# Start interacting with the environment.
unity_env.reset()
gym_env = UnityToGymWrapper(unity_env, allow_multiple_obs=False)
env = PlaceAndShootGym(gym_env, reward_fn=endsInBucket,
                       actionTransformer=NO_OBJECT_INTERACTION)

E0423 15:56:45.333396000 4474121728 fork_posix.cc:76]                  Other threads are currently calling into gRPC, skipping fork() handlers


[INFO] Connected to Unity environment with package version 2.2.1-exp.1 and communication version 1.5.0
[INFO] Connected new brain: PlaceAndShoot?team=0




In [10]:
env.setup(GAME_2_SETUP)

In [11]:
# search over movements

for mouse_x in np.arange(-1.0, 1.0, 0.2):
    for mouse_y in np.arange(-1.0, 1.0, 0.2):
        # reshoot
        env.step([0, 0, 0, 0, 0, 1])
        curr_action = [mouse_x, mouse_y, 0, 0, 0, 0]
        print(f"Shot: {curr_action}")
        raw_obs, reward, _done, _info = env.step(curr_action)
        if reward>0:
            print(f"Game is playable!")
            break

IndexError: index 13 is out of bounds for axis 0 with size 13

In [8]:
env.close()

In [None]:
env.reset()

UnityEnvironmentException: No Unity environment is loaded.