Copyright 2020 Montvieux Ltd

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [None]:
import PIL.Image
from IPython.display import display,clear_output,HTML
from IPython.display import Image as DisplayImage
import base64
import json
from io import StringIO
import ipywidgets as widgets
import sys
import time
import datetime
import imageio
import numpy as np
import io
import os
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy

from plark_game import classes
from gym_plark.envs import plark_env,plark_env_guided_reward,plark_env_top_left


from stable_baselines3 import DQN, PPO, A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv


import helper 
import self_play

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

%matplotlib inline
%load_ext autoreload
%autoreload 2

### Save model location

In [2]:

basicdate = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))

basepath = '/data/agents/models'
exp_name = 'test_' + basicdate
exp_path = os.path.join(basepath, exp_name)

print(exp_path)

/data/agents/models/test_20210219_163926


# Use the self play training loop - short run time example

In [None]:
basicdate = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))

basepath = '/data/agents/models'
exp_name = 'test_' + basicdate
exp_path = os.path.join(basepath, exp_name)

print(exp_path)

video_path,basewidth,hsize = self_play.run_self_play(exp_name,exp_path,basicdate,
                    pelican_testing_interval=100,pelican_max_initial_learning_steps=1000,
                    panther_testing_interval=100,panther_max_initial_learning_steps=1000,
                    self_play_testing_interval=100,self_play_max_learning_steps_per_agent=1000,self_play_iterations=10,
                    model_type='dqn',log_to_tb=False,image_based=False                                 
                )


In [None]:
# video_path = '/data/agents/models/test_20200325_184254/test_self_play.mp4'
# basewidth = 310
# hsize = 250
video = io.open(video_path, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" width="'''+str(basewidth)+'''" height="'''+str(hsize)+'''" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))

# Use the self play training loop - Longer running example

In [None]:

basicdate = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))

basepath = '/data/agents/models'
exp_name = 'test_' + basicdate
exp_path = os.path.join(basepath, exp_name)

print(exp_path)

video_path,basewidth,hsize = self_play.run_self_play(exp_name,exp_path,basicdate,
                   pelican_testing_interval=10000,pelican_max_initial_learning_steps=100000,
                   panther_testing_interval=10000,panther_max_initial_learning_steps=100000,
                   self_play_testing_interval=10000,self_play_max_learning_steps_per_agent=100000,self_play_iterations=1000,
                   model_type='dqn',log_to_tb=False,image_based=False                                 
               )

In [None]:
# video_path = '/data/agents/models/test_20200325_184254/test_self_play.mp4'
# basewidth = 310
# hsize = 250
video = io.open(video_path, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" width="'''+str(basewidth)+'''" height="'''+str(hsize)+'''" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))

# Make video of previously trained agents 

In [None]:
# Please update the paths below to match a trained agent. 
panther_agent_filepath = '/data/agents/models/test_20210209_213638/ppo2_20210209_213638_panther/'
image_based = False
pelican_env = plark_env.PlarkEnv(driving_agent='pelican',panther_agent_filepath=panther_agent_filepath,config_file_path='/Components/plark-game/plark_game/game_config/10x10/balanced.json',image_based=image_based)
pelican_load_path = '/data/agents/models/test_20210209_213638/ppo2_20210209_213638_pelican/ppo2_20210209_213638_pelican.zip'
pelican_model = PPO.load(pelican_load_path)        


#INFO:helper:model_path: /data/agents/models/test_20210209_213638/ppo2_20210209_213638_panther/ppo2_20210209_213638_panther.zip
#NFO:helper:model_path: /data/agents/models/test_20210209_213638/ppo2_20210209_213638_pelican/ppo2_20210209_213638_pelican.zip

In [None]:
video_path = '/data/test_video/'
os.makedirs(video_path, exist_ok=True)
video_file_path =  os.path.join(video_path, 'test_self_play.mp4') 
basewidth,hsize = helper.make_video(pelican_model,pelican_env,video_file_path,verbose=True,n_steps = 100000)


In [None]:
video = io.open(video_file_path, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" width="'''+str(basewidth)+'''" height="'''+str(hsize)+'''" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))