In [None]:
import gym


#定义环境
class MyWrapper(gym.Wrapper):

    def __init__(self):
        from pettingzoo.mpe import simple_spread_v3
        self.N = 2
        env = simple_spread_v3.env(N=self.N,
                                   local_ratio=0.5,
                                   max_cycles=1e8,
                                   render_mode='rgb_array')
        super().__init__(env)
        self.env = env
        self.step_n = 0

    def reset(self):
        self.env.reset()
        self.step_n = 0

        #不允许两个目标点靠得太近
        import numpy as np
        mark0, mark1 = self.env.env.env.world.landmarks
        dist = np.array(mark0.state.p_pos) - np.array(mark1.state.p_pos)
        dist = (dist**2).sum()**0.5
        if dist < 1:
            return self.reset()

        return self.state()

    def state(self):
        state = []
        for i in self.env.agents:
            state.append(env.observe(i).tolist())
        return state

    def step(self, action):
        #走一步停N步,取消惯性.
        reward_sum = [0] * self.N
        for i in range(5):
            if i != 0:
                action = [-1, -1]
            next_reward, reward, over = self._step(action)
            for j in range(self.N):
                reward_sum[j] += reward[j]
            self.step_n -= 1

        self.step_n += 1

        return next_reward, reward_sum, over

    def _step(self, action):
        for i, _ in enumerate(env.agent_iter(self.N)):
            self.env.step(action[i] + 1)

        reward = [self.env.rewards[i] for i in self.env.agents]

        _, _, termination, truncation, _ = env.last()
        over = termination or truncation

        #限制最大步数
        self.step_n += 1
        if self.step_n >= 50:
            over = True

        return self.state(), reward, over

    #打印游戏图像
    def show(self):
        from matplotlib import pyplot as plt
        plt.figure(figsize=(3, 3))
        plt.imshow(self.env.render())
        plt.show()


env = MyWrapper()
env.reset()

env.show()