In [None]:
# GFootball environment.
!pip install kaggle_environments
!apt-get update -y
!apt-get install -y libsdl2-gfx-dev libsdl2-ttf-dev
!git clone -b v2.3 https://github.com/google-research/football.git
!mkdir -p football/third_party/gfootball_engine/lib
!wget https://storage.googleapis.com/gfootball/prebuilt_gameplayfootball_v2.3.so -O football/third_party/gfootball_engine/lib/prebuilt_gameplayfootball.so
!cd football && GFOOTBALL_USE_PREBUILT_SO=1 pip3 install .

# Some helper code
!git clone https://github.com/garethjns/kaggle-football.git
!pip install reinforcement_learning_keras==0.6.0

In [None]:
import collections
from typing import Union, Callable, List, Tuple, Iterable, Any, Dict
from dataclasses import dataclass
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
import tensorflow as tf
import seaborn as sns
import gym
import gfootball
import glob 
import imageio
import pathlib
import zlib
import pickle
import tempfile
import os
import sys
from IPython.display import Image, display
from gfootball.env import observation_preprocessing
sns.set()

# In TF > 2, training keras models in a loop with eager execution on causes memory leaks and terrible performance.
tf.compat.v1.disable_eager_execution()

sys.path.append("/kaggle/working/kaggle-football/")

In [None]:
from __future__ import division
from __future__ import print_function
import itertools as it
from random import sample, randint, random
from time import time, sleep
import numpy as np
import skimage.color, skimage.transform
import tensorflow as tf
from tqdm import trange
from argparse import ArgumentParser

In [None]:
class DataWrapper():

    def __init__(self, obs_shape= (72, 96, 4)) :
        """
        :param env: Gym env, or None. Allowing None here is unusual,
                    but we'll reuse the buffer functunality later in
                    the submission, when we won't be using the gym API.
        :param obs_shape: Expected shape of single observation.
        """
        self.data_length = 2
        self.frame_shape = obs_shape
        self.init_frame_buffer()

    @staticmethod
    def normalize_frame(frame):
        return frame / 255.0

    def init_frame_buffer(self):
        """Create buffer and preallocate with empty arrays of expected shape."""

        self.frame_buffer = collections.deque(maxlen=self.data_length)

        for _ in range(self.data_length):
            self.frame_buffer.append(np.zeros(shape=self.frame_shape))

    def build_buffered_frames(self):
        """
        Iterate over the last dimenion, and take the difference between this obs 
        and the last obs for each.
        """
        diff_buff = np.empty(self.frame_shape)
        for f in range(self.frame_shape[-1]):
            diff_buff[..., f] = self.frame_buffer[1][..., f] - self.frame_buffer[0][..., f]

        return diff_buff

    def stack_buffer(self, frame ):

        frame = self.normalize_frame(frame)
        self.frame_buffer.append(frame)

        return self.build_buffered_frames()

    def reset(self , reset_frame):
        """Add initial obs to end of pre-allocated buffer.

        :return: Buffered observation
        """
        self.init_frame_buffer()
        self.frame_buffer.append(self.normalize_frame(reset_frame))

        return self.build_buffered_frames()

In [None]:
import copy
import pylab
import random
import numpy as np
from keras.layers import Dense , Conv2D , Flatten
from keras.optimizers import Adam
from keras.models import Sequential

EPISODES = 10


# this is DeepSARSA Agent for the GridWorld
# Utilize Neural Network as q function approximator
class DeepSARSAgent:
    def __init__(self):
        self.load_model = False
        # actions which agent can do
        self.action_space = list(range(19))
        # get size of state and action
        self.action_size = len(self.action_space)
        self.state_size = 4
        self.discount_factor = 0.99
        self.learning_rate = 0.001

        self.epsilon = 1.  # exploration
        self.epsilon_decay = .999999
        self.epsilon_min = 0.01
        self.model = self.build_model()

        if self.load_model:
            self.epsilon = 0.05
            self.model.load_weights('./save_model/deep_sarsa_trained.h5')

    # approximate Q function using Neural Network
    # state is input and Q Value of each action is output of network
    def build_model(self):
        
        model   = Sequential()
        input_shape = (72, 96, 4)
        model.add(Conv2D(32 , kernel_size= 3 , strides=(2,2) ,  input_shape=input_shape , padding="valid"))
        model.add(Conv2D(64 , kernel_size= 5 , strides=(2,2) , padding="valid"))
        model.add(Conv2D(128 , kernel_size= 5 , strides=(2,2) , padding="valid"))
        model.add(Flatten())
        model.add(Dense(256 , activation="relu"))
        model.add(Dense(19))
        model.compile(loss="mean_squared_error", optimizer=Adam(lr=self.learning_rate))
        model.summary()
        return model

    # get action from model using epsilon-greedy policy
    def get_action(self, state):
        if np.random.rand() <= self.epsilon:
            # The agent acts randomly
            return random.randrange(self.action_size)
        else:
            # Predict the reward value based on the given state
            state = np.expand_dims(np.float32(state),0)
            q_values = self.model.predict(state)
            return np.argmax(q_values[0])

    def train_model(self, state, action, reward, next_state, next_action, done):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        state = np.expand_dims(np.float32(state),0)
        next_state = np.expand_dims( np.float32(next_state) ,0 )
        target = self.model.predict(state   )[0]
        # like Q Learning, get maximum Q value at s'
        # But from target model
        if done:
            target[action] = reward
        else:
            target[action] = (reward + self.discount_factor *
                              self.model.predict( next_state )[0][next_action])

        target = np.reshape(target, [1, 19])
        # make minibatch which includes target q value and predicted q value
        # and do the model fit!
        self.model.fit(state, target, epochs=1, verbose=0)


if __name__ == "__main__":
    
    football_env = gym.make("GFootball-11_vs_11_kaggle-SMM-v0")
    agent = DeepSARSAgent()
    data_processor = DataWrapper()
    
    global_step = 0
    scores, episodes = [], []

    for e in range(EPISODES):
        done = False
        score = 0
        state = football_env.reset()
        state  = data_processor.reset(state)
        while not done:
            # fresh env
            global_step += 1

            # get action for the current state and go one step in environment
            action = agent.get_action(state)
            
            next_state, reward, done , _ = football_env.step(action)
            
            next_state = data_processor.stack_buffer(next_state)
            
            next_action = agent.get_action(next_state)
            
            agent.train_model(state, action, reward, next_state, next_action,done)
            state = next_state
            # every time step we do training
            score += reward

            state = copy.deepcopy(next_state)

            if done:
                scores.append(score)
                episodes.append(e)
                print("episode:", e, "  score:", score, "global_step",
                      global_step, "  epsilon:", agent.epsilon)

        if e % 2 == 0:
            agent.model.save("deep_sarsa.h5")

In [None]:
%%writefile main.py

#from kaggle_environments.envs.football.helpers import *

# @human_readable_agent wrapper modifies raw observations 
# provided by the environment:
# https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#raw-observations
# into a form easier to work with by humans.
# Following modifications are applied:
# - Action, PlayerRole and GameMode enums are introduced.
# - 'sticky_actions' are turned into a set of active actions (Action enum)
#    see usage example below.
# - 'game_mode' is turned into GameMode enum.
# - 'designated' field is removed, as it always equals to 'active'
#    when a single player is controlled on the team.
# - 'left_team_roles'/'right_team_roles' are turned into PlayerRole enums.
# - Action enum is to be returned by the agent function.


import collections
import pickle
import zlib
from typing import Tuple, Dict, Any, Union, Callable, List

import gym
import numpy as np
import tensorflow as tf
from gfootball.env import observation_preprocessing
from tensorflow import keras

import random                # Handling random number generation
import time                  # Handling time calculation
from skimage import transform# Help us to preprocess the frames

from collections import deque# Ordered collection with ends
from collections import namedtuple
import numpy as np

stacked_size = 3


from keras.models import load_model
import numpy as np
import keras.backend as K
import tensorflow as tf

#------------------------------------- Data Preprocessing ---------------------------------------------------------
class DataWrapper():

    def __init__(self, obs_shape= (72, 96, 4)) :
        """
        :param env: Gym env, or None. Allowing None here is unusual,
                    but we'll reuse the buffer functunality later in
                    the submission, when we won't be using the gym API.
        :param obs_shape: Expected shape of single observation.
        """
        self.data_length = 2
        self.frame_shape = obs_shape
        self.init_frame_buffer()

    @staticmethod
    def normalize_frame(frame):
        return frame / 255.0

    def init_frame_buffer(self):
        """Create buffer and preallocate with empty arrays of expected shape."""

        self.frame_buffer = collections.deque(maxlen=self.data_length)

        for _ in range(self.data_length):
            self.frame_buffer.append(np.zeros(shape=self.frame_shape))

    def build_buffered_frames(self):
        """
        Iterate over the last dimenion, and take the difference between this obs 
        and the last obs for each.
        """
        diff_buff = np.empty(self.frame_shape)
        for f in range(self.frame_shape[-1]):
            diff_buff[..., f] = self.frame_buffer[1][..., f] - self.frame_buffer[0][..., f]

        return diff_buff

    def stack_buffer(self, frame ):

        frame = self.normalize_frame(frame)
        self.frame_buffer.append(frame)

        return self.build_buffered_frames()

    def reset(self , reset_frame):
        """Add initial obs to end of pre-allocated buffer.

        :return: Buffered observation
        """
        self.init_frame_buffer()
        self.frame_buffer.append(self.normalize_frame(reset_frame))

        return self.build_buffered_frames()

try:
    model_sarsa = load_model('/kaggle_simulations/agent/deep_sarsa.h5')
except (FileNotFoundError, ValueError):
    model_sarsa = load_model('deep_sarsa.h5')


data_preprocessor = DataWrapper()


def agent(obs):

    # Get the raw observations return by the environment
    obs = obs['players_raw'][0]
    # Convert these to the same output as the SMMWrapper we used in training
    obs = observation_preprocessing.generate_smm([obs]).squeeze()
    
    state = data_preprocessor.stack_buffer(obs)
    
    #inference the model
    action_probs = model_sarsa.predict(np.expand_dims(state , 0 ) , steps=1)
    # Use the SMMFrameProcessWrapper to do the buffering, but not enviroment
    # stepping or anything related to the Gym API.
    action = np.argmax(action_probs)

    return [int(action)]

In [None]:
from typing import Tuple, Dict, List, Any

from kaggle_environments import make

env = make("football", debug=True,configuration={"save_video": True,
                                      "scenario_name": "11_vs_11_kaggle"})

# Define players
left_player = "/kaggle/working/main.py"  # A custom agent, eg. random_agent.py or example_agent.py
right_player = "run_right"  # eg. A built in 'AI' agent or the agent again


output: List[Tuple[Dict[str, Any], Dict[str, Any]]] = env.run([left_player, right_player])

#print(f"Final score: {sum([r['reward'] for r in output[0]])} : {sum([r['reward'] for r in output[1]])}")
env.render(mode="human", width=800, height=600)

In [None]:
!tar -czvf submission.tar.gz ./main.py*  ./deep_sarsa.h5*