In [1]:
import os
from collections import deque

import numpy as np
import torch as th

import gym
from gym.spaces import Box, Discrete

from gfootball.env import create_environment
from gfootball.env import observation_preprocessing

from stable_baselines3 import PPO
from stable_baselines3.ppo import CnnPolicy
from stable_baselines3.common import results_plotter
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv

from tqdm.notebook import tqdm

In [2]:
import os
import base64
import pickle
import zlib
import gym
import numpy as np
import pandas as pd
import torch as th
from torch import nn, tensor
from collections import deque
from gym.spaces import Box, Discrete
from gfootball.env import create_environment, observation_preprocessing
from stable_baselines3 import PPO
from stable_baselines3.ppo import CnnPolicy
from stable_baselines3.common import results_plotter
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from IPython.display import HTML
from matplotlib import pyplot as plt
%matplotlib inline

In [3]:
scenarios = {0: "academy_empty_goal_close",
             1: "academy_empty_goal",
             2: "academy_run_to_score",
             3: "academy_run_to_score_with_keeper",
             4: "academy_pass_and_shoot_with_keeper",
             5: "academy_run_pass_and_shoot_with_keeper",
             6: "academy_3_vs_1_with_keeper",
             7: "academy_corner",
             8: "academy_counterattack_easy",
             9: "academy_counterattack_hard",
             10: "academy_single_goal_versus_lazy",
             11: "11_vs_11_kaggle"}
scenario_name = scenarios[0]

In [4]:
class FootballGym(gym.Env):
    spec = None
    metadata = None
    
    def __init__(self, config=None):
        super(FootballGym, self).__init__()
        env_name = "academy_empty_goal_close"
        rewards = "scoring,checkpoints"
        if config is not None:
            env_name = config.get("env_name", env_name)
            rewards = config.get("rewards", rewards)
        self.env = create_environment(
            env_name=env_name,
            stacked=False,
            representation="raw",
            rewards = rewards,
            write_goal_dumps=False,
            write_full_episode_dumps=False,
            render=False,
            write_video=False,
            dump_frequency=1,
            logdir=".",
            extra_players=None,
            number_of_left_players_agent_controls=1,
            number_of_right_players_agent_controls=0)  
        self.action_space = Discrete(19)
        self.observation_space = Box(low=0, high=255, shape=(72, 96, 16), dtype=np.uint8)
        self.reward_range = (-1, 1)
        self.obs_stack = deque([], maxlen=4)
        
    def transform_obs(self, raw_obs):
        obs = raw_obs[0]
        obs = observation_preprocessing.generate_smm([obs])
        if not self.obs_stack:
            self.obs_stack.extend([obs] * 4)
        else:
            self.obs_stack.append(obs)
        obs = np.concatenate(list(self.obs_stack), axis=-1)
        obs = np.squeeze(obs)
        return obs

    def reset(self):
        self.obs_stack.clear()
        obs = self.env.reset()
        obs = self.transform_obs(obs)
        return obs
    
    def step(self, action):
        obs, reward, done, info = self.env.step([action])
        obs = self.transform_obs(obs)
        return obs, float(reward), done, info
    
check_env(env=FootballGym(), warn=True)

In [5]:
from multiprocessing.connection import Pipe
import numpy as np
from stable_baselines3 import PPO
# from stable_baselines3.common.policies import CnnPolicy


In [6]:
import os
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecEnv
from pathos.multiprocessing import ProcessPool

def worker(remote, parent_remote, env_fn_wrapper):
    parent_remote.close()
    env = env_fn_wrapper()
    while True:
        cmd, data = remote.recv()
        if cmd == 'step':
            ob, reward, done, info = env.step(data)
            if done:
                ob = env.reset()
            remote.send((ob, reward, done, info))
        elif cmd == 'reset':
            ob = env.reset()
            remote.send(ob)
        elif cmd == 'close':
            remote.close()
            break
        elif cmd == 'get_spaces':
            remote.send((env.observation_space, env.action_space))
        elif cmd == 'env_method':
            method_name, method_args, method_kwargs = data
            method = getattr(env, method_name)
            remote.send(method(*method_args, **method_kwargs))
        elif cmd == 'env_is_wrapped':
            wrapper_class = data
            remote.send(wrapper_class in env._env_is_wrapped_cache)
        elif cmd == 'get_attr':
            attr_name = data
            remote.send(getattr(env, attr_name))
        elif cmd == 'set_attr':
            attr_name, value = data
            setattr(env, attr_name, value)
            remote.send(True)
        elif cmd == 'seed':
            seed = data
            remote.send(env.seed(seed))
        else:
            raise NotImplementedError


class CustomSubprocVecEnv(VecEnv):
    def __init__(self, env_fns):
        self.waiting = False
        self.closed = False
        n_envs = len(env_fns)
        self.n_envs = n_envs
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(n_envs)])

        self.pool = ProcessPool(n_envs)
        self.ps = [self.pool.apipe(worker, work_remote, remote, env_fn)
                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]

        for remote in self.work_remotes:
            remote.close()

        self._dummy_env = DummyVecEnv([env_fns[0]])
        super().__init__(self.n_envs, self._dummy_env.observation_space, self._dummy_env.action_space)



    def step_async(self, actions):
        for remote, action in zip(self.remotes, actions):
            remote.send(("step", action))
        self.waiting = True

    def step_wait(self):
        results = [remote.recv() for remote in self.remotes]
        self.waiting = False
        obs, rews, dones, infos = zip(*results)
        return np.stack(obs), np.stack(rews), np.stack(dones), infos

    def reset(self):
        for remote in self.remotes:
            remote.send(("reset", None))
        return np.stack([remote.recv() for remote in self.remotes])

    def close(self):
        if self.closed:
            return
        if self.waiting:
                remote.recv()
        for remote in self.remotes:
            remote.send(("close", None))
        self.pool.close()
        self.pool.join()
        self.closed = True


    def get_images(self):
        for remote in self.remotes:
            remote.send(("render", None))
        imgs = [remote.recv() for remote in self.remotes]
        return imgs

    @property
    def unwrapped(self):
        return self._dummy_env.unwrapped
    def env_method(self, method_name, *method_args, **method_kwargs):
        for remote in self.remotes:
            remote.send(('env_method', (method_name, method_args, method_kwargs)))
        return [remote.recv() for remote in self.remotes]

    def env_is_wrapped(self, wrapper_class):
        for remote in self.remotes:
            remote.send(('env_is_wrapped', wrapper_class))
        return [remote.recv() for remote in self.remotes]

    def get_attr(self, attr_name):
        for remote in self.remotes:
            remote.send(('get_attr', attr_name))
        return [remote.recv() for remote in self.remotes]

    def set_attr(self, attr_name, value):
        for remote in self.remotes:
            remote.send(('set_attr', (attr_name, value)))
        return [remote.recv() for remote in self.remotes]

    def seed(self, seed=None):
        for remote in self.remotes:
            remote.send(('seed', seed))
        return [remote.recv() for remote in self.remotes]

In [7]:
# train_env = CustomSubprocVecEnv([make_env(config, rank=i) for i in range(n_envs)])


In [8]:
test_env = FootballGym({"env_name":scenario_name})
check_env(env=test_env, warn=True)

In [9]:
class ProgressBar(BaseCallback):
    
    def __init__(self, verbose=0):
        super(ProgressBar, self).__init__(verbose)
        self.pbar = None

    def _on_training_start(self):
        factor = np.ceil(self.locals['total_timesteps'] / self.model.n_steps)
        try:
            n = len(self.training_env.remotes)
        except AttributeError:
            n = len(self.training_env.envs)
        total = int(self.model.n_steps * factor / n)
        self.pbar = tqdm(total=total)

    def _on_rollout_start(self):
        self.pbar.refresh()

    def _on_step(self):
        self.pbar.update(1)
        return True

    def _on_rollout_end(self):
        self.pbar.refresh()

    def _on_training_end(self):
        self.pbar.close()
        self.pbar = None

In [10]:
def make_env(config=None, rank=0):
    def _init():
        env = FootballGym(config)
        log_file = os.path.join(".", str(rank))
        env = Monitor(env, log_file, allow_early_resets=True)
        return env
    return _init

In [11]:
n_envs = 12
n_steps = 1

In [12]:
config={"env_name":scenario_name}
# train_env = DummyVecEnv([make_env(config, rank=i) for i in range(n_envs)])
# train_env = SubprocVecEnv([make_env(config, rank=i) for i in range(n_envs)], start_method='spawn')
train_env = CustomSubprocVecEnv([make_env(config, rank=i) for i in range(n_envs)])

model = PPO(CnnPolicy, train_env, n_steps=n_steps, verbose=1)
# model = PPO.load("../input/gfootball-stable-baselines3/ppo_gfootball.zip", train_env)

progressbar = ProgressBar()

Using cpu device
Wrapping the env in a VecTransposeImage.


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1 and n_envs=12)


In [13]:
from tqdm import tqdm

In [None]:
total_timesteps = n_steps * n_envs * 500
model.learn(total_timesteps=total_timesteps, callback=progressbar)
model.save("ppo_gfootball")