In [None]:
# GFootball environment.
!pip install kaggle_environments
!apt-get update -y
!apt-get install -y libsdl2-gfx-dev libsdl2-ttf-dev
!git clone -b v2.3 https://github.com/google-research/football.git
!mkdir -p football/third_party/gfootball_engine/lib
!wget https://storage.googleapis.com/gfootball/prebuilt_gameplayfootball_v2.3.so -O football/third_party/gfootball_engine/lib/prebuilt_gameplayfootball.so
!cd football && GFOOTBALL_USE_PREBUILT_SO=1 pip3 install .

# Some helper code
!git clone https://github.com/garethjns/kaggle-football.git
!pip install reinforcement_learning_keras==0.6.0

In [None]:
import collections
from typing import Union, Callable, List, Tuple, Iterable, Any, Dict
from dataclasses import dataclass
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
import tensorflow as tf
import seaborn as sns
import gym
import gfootball
import glob 
import imageio
import pathlib
import zlib
import pickle
import tempfile
import os
import sys
from IPython.display import Image, display
from gfootball.env import observation_preprocessing
sns.set()

# In TF > 2, training keras models in a loop with eager execution on causes memory leaks and terrible performance.
tf.compat.v1.disable_eager_execution()

sys.path.append("/kaggle/working/kaggle-football/")

In [None]:
from __future__ import division
from __future__ import print_function
import itertools as it
from random import sample, randint, random
from time import time, sleep
import numpy as np
import skimage.color, skimage.transform
import tensorflow as tf
from tqdm import trange
from argparse import ArgumentParser

In [None]:
football = gym.make("GFootball-11_vs_11_kaggle-SMM-v0")
print(env.reset().shape)

In [None]:
import gym
import numpy as np

from keras.models import Model
from keras.layers import *
from keras import backend as K

from collections import deque

def one_hot(index, categories):
    x = np.zeros((categories,))
    x[index] = 1
    return x

def discount_rewards(r, gamma=0.99):
    """ Take 1D float array of rewards and compute discounted reward """
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(0, len(r))):
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r

def pg_loss(advantage):
    def f(y_true, y_pred):
        """
        Policy gradient loss
        """
        # L = \sum{A * log(p(y | x))}
        # Mask out probability of action taken
        responsible_outputs = K.sum(y_true * y_pred, axis=1)
        policy_loss = -K.sum(advantage * K.log(responsible_outputs))
        return policy_loss
    return f

def create_model():
    """
    Model architecture
    """
    state = Input(shape=(72,96,4))
    x = Conv2D(32 , kernel_size= 3 , strides=(2,2) , padding="valid")(state)
    x = Conv2D(64 , kernel_size= 3 , strides=(2,2) , padding="valid")(x)
    x = Conv2D(128 , kernel_size= 3 , strides=(2,2) , padding="valid")(x)
    x = Flatten()(x)
    x = Dense(256 , activation="relu")(x)
    x = Dense(19)(x)
    x = Activation('softmax')(x)

    model = Model(state, x)
    return model

def pg(model):
    """
    Wraps the model in a policy gradient model
    """
    state = Input(shape=(72,96,4))
    # Advantages for loss function
    adv_input = Input(shape=(1,))

    x = model(state)

    model = Model([state, adv_input], x)
    model.compile(
        optimizer='nadam',
        loss=pg_loss(adv_input)
    )

    return model

g_model = create_model()
pg_model = pg(g_model)
all_rewards = deque(maxlen=100)

for i_episode in range(100):
    observation = football.reset()

    # History of this episode
    state_history = []
    action_history = []
    reward_history = []

    for t in range(1000):
        # env.render()

        state_history.append(observation)

        action_prob = g_model.predict(np.expand_dims(observation, axis=0))[0]
        action = np.random.choice(len(action_prob), 1, p=action_prob)[0]
        observation, reward, done, info = football.step(action)

        reward_history.append(reward)
        action_history.append(one_hot(action, 19))

        if done:
            reward_sum = sum(reward_history)
            all_rewards.append(reward_sum)

            adv = discount_rewards(reward_history)

            state_history = np.array(state_history)
            action_history = np.array(action_history)

            pg_model.train_on_batch([state_history, adv], action_history)

            print("Episode finished with reward {} {:.2f}".format(reward_sum, np.mean(all_rewards)))
            break

In [None]:
g_model.save("deep_policy_gradient.h5")

In [None]:
%%writefile main.py

import collections
import pickle
import zlib
from typing import Tuple, Dict, Any, Union, Callable, List

import gym
from gfootball.env import observation_preprocessing
from tensorflow import keras

import random                # Handling random number generation
import time                  # Handling time calculation
from skimage import transform# Help us to preprocess the frames

from collections import deque# Ordered collection with ends
from collections import namedtuple
import numpy as np

from keras.models import load_model
import numpy as np
import keras.backend as K
import tensorflow as tf


try:
    model_policy_grad = load_model('/kaggle_simulations/agent/deep_policy_gradient.h5')
except (FileNotFoundError, ValueError):
    model_policy_grad = load_model('deep_policy_gradient.h5')

def agent(obs):

    # Get the raw observations return by the environment
    obs = obs['players_raw'][0]
    # Convert these to the same output as the SMMWrapper we used in training
    obs = observation_preprocessing.generate_smm([obs]).squeeze()
    
    #inference the model
    action_probs = model_policy_grad.predict(np.expand_dims(state , 0 ) , steps=1)
    # Use the SMMFrameProcessWrapper to do the buffering, but not enviroment
    # stepping or anything related to the Gym API.
    action = np.argmax(action_probs)

    return [int(action)]

In [None]:
from typing import Tuple, Dict, List, Any

from kaggle_environments import make

env = make("football", debug=True,configuration={"save_video": True,
                                      "scenario_name": "11_vs_11_kaggle"})

# Define players
left_player = "/kaggle/working/main.py"  # A custom agent, eg. random_agent.py or example_agent.py
right_player = "run_right"  # eg. A built in 'AI' agent or the agent again


output: List[Tuple[Dict[str, Any], Dict[str, Any]]] = env.run([left_player, right_player])

#print(f"Final score: {sum([r['reward'] for r in output[0]])} : {sum([r['reward'] for r in output[1]])}")
env.render(mode="human", width=800, height=600)

In [None]:
!tar -czvf submission.tar.gz ./main.py*  ./dqn_keras.h5*