In [10]:
import copy
import glob
import json
import os
import shutil
import unittest
from math import factorial
import torch
import torch.nn as nn

import gym
import numpy as np

from overcooked_ai_py.agents.agent import (
    AgentGroup,
    AgentPair,
    FixedPlanAgent,
    GreedyHumanModel,
    RandomAgent,
)

from overcooked_ai_py.agents.benchmarking import AgentEvaluator
from overcooked_ai_py.mdp.actions import Action, Direction
from overcooked_ai_py.mdp.layout_generator import (
    DISH_DISPENSER,
    ONION_DISPENSER,
    POT,
    SERVING_LOC,
    TOMATO_DISPENSER,
    LayoutGenerator,
)
from overcooked_ai_py.mdp.overcooked_env import (
    DEFAULT_ENV_PARAMS,
    OvercookedEnv,
    Overcooked
)
from overcooked_ai_py.mdp.overcooked_mdp import (
    ObjectState,
    OvercookedGridworld,
    OvercookedState,
    PlayerState,
    Recipe,
    SoupState,
)
from overcooked_ai_py.mdp.overcooked_trajectory import (
    DEFAULT_TRAJ_KEYS,
    EPISODE_TRAJ_KEYS,
    TIMESTEP_TRAJ_KEYS,
    append_trajectories,
)
from overcooked_ai_py.planning.planners import (
    NO_COUNTERS_PARAMS,
    MediumLevelActionManager,
    MotionPlanner,
)
from overcooked_ai_py.static import TESTING_DATA_DIR
from overcooked_ai_py.utils import (
    iterate_over_json_files_in_dir,
    load_from_json,
    load_pickle,
    save_as_json,
    save_pickle,
)

START_ORDER_LIST = ["any"]
n, s = Direction.NORTH, Direction.SOUTH
e, w = Direction.EAST, Direction.WEST
stay, interact = Action.STAY, Action.INTERACT
P, Obj = PlayerState, ObjectState


def comb(n, k):
    return factorial(n) / (factorial(n - k) * factorial(k))

def random_joint_action():
    num_actions = len(Action.ALL_ACTIONS)
    a_idx0, a_idx1 = np.random.randint(low=0, high=num_actions, size=2)
    return (Action.INDEX_TO_ACTION[a_idx0], Action.INDEX_TO_ACTION[a_idx1])

force_compute_large = False
force_compute = True
DISPLAY = False

simple_mdp = OvercookedGridworld.from_layout_name("cramped_room")
large_mdp = OvercookedGridworld.from_layout_name("corridor")


In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
dummy_dir = "overcooked_test_temp"

if not os.path.exists(dummy_dir):
    os.makedirs(dummy_dir)

base_mdp = OvercookedGridworld.from_layout_name("scenario2")
start_state = OvercookedState(
    [P((8, 1), s), P((1, 1), e)],
    {},
    all_orders=base_mdp.start_all_orders,
)
env = OvercookedEnv.from_mdp(base_mdp, start_state_fn=lambda: start_state, horizon=100)
print(env)

X       X       X       X       X       O       X       X       X       X       

S       →1                                                      ↓0      O       

D                                                                       X       

X       X       X       X       X       X       P       X       X       X       




In [13]:


# trajectory, time_taken, _, _ = env.run_agents(
#     agent_pair, include_final_state=True, display=DISPLAY
# )
# end_state = trajectory[-1][0]


In [14]:
mlam = MediumLevelActionManager.from_pickle_or_compute(
    base_mdp, NO_COUNTERS_PARAMS, force_compute=True
)

agent1 = GreedyHumanModel(mlam)
agent2 = GreedyHumanModel(mlam)

agent_pair = AgentPair(agent1, agent2)

In [15]:
for i in range(5):
    print(f"timestep {i+1}")

    print(trajectory[i][4]['sparse_r_by_agent'],trajectory[i][4]['shaped_r_by_agent'])

timestep 1


NameError: name 'trajectory' is not defined

In [None]:
base_mdp = env.mdp
base_mdp.lossless_state_encoding_shape



array([10,  4, 26])

In [None]:
mappo_agent1 = MAPPO_Actor(env)
mappo_agent2 = MAPPO_Actor(env)
agent_pair = AgentPair(mappo_agent1, mappo_agent2)

In [None]:
state1 = env.lossless_state_encoding_mdp(env.state)[0]
state2 = env.lossless_state_encoding_mdp(env.state)[1]

# flatten the state
state1 = state1.flatten()
state2 = state2.flatten()

for i in range(len(state1)):
    if state1[i] != state2[i]:
        print(i)

338
339
343
347
676
677
680
684


In [43]:
mdp = OvercookedGridworld.from_layout_name("large_room")
base_env = OvercookedEnv.from_mdp(mdp, horizon=500)
gym_env = Overcooked(base_env = base_env, featurize_fn =base_env.featurize_state_mdp)

Computing MotionPlanner to be saved in /data/haofenghuang/warmup_project/mappo/overcooked_ai/src/overcooked_ai_py/data/planners/large_room_mp.pkl
It took 0.15352559089660645 seconds to create mp


In [44]:


action = np.array([[5],[1]])

obs, reward, done, info = gym_env.step(action)
reward

array([[0.],
       [0.]], dtype=float32)

In [45]:
gym_env.base_env

X       X       X       P       X       X       X       

O                                               O       

X                                       ↓1      X       

X                                               X       

X                                               X       

X       ↑0                                      X       

X       D       X       X       X       S       X       
