In [None]:
#pip install tensorflow==1.15
#Install stable-baselines as described in the documentation

import gym
import gym_pcgrl
from gym_pcgrl import wrappers

from stable_baselines.common.policies import MlpPolicy, CnnPolicy, FeedForwardPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines.a2c.utils import conv, linear, conv_to_fc
from stable_baselines.results_plotter import load_results, ts2xy
from stable_baselines import PPO2

import tensorflow as tf
import numpy as np

import pdb

# Load Random Model

In [None]:
def Cnn(image, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(conv(image, 'c1', n_filters=32, filter_size=3, stride=2, init_scale=np.sqrt(2), **kwargs)) # filter_size=3
    layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=2, init_scale=np.sqrt(2), **kwargs)) #filter_size = 3
    layer_3 = activ(conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs))
    layer_3 = conv_to_fc(layer_3)
    return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))

class CustomPolicy(FeedForwardPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs, cnn_extractor=Cnn, feature_extraction="cnn")

model = PPO2(CustomPolicy, env, verbose=1, tensorboard_log="./runs")

# Load Saved Model

In [None]:
model = PPO2.load("binary_narrow_limited_centered")

# Run Model

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

def show_state(env, l, c, r, step=0, name="", info=""):
    fig = plt.figure(10)
    plt.clf()
    
    plt.title("{} | Step: {} Path: {} Changes: {} Regions: {}".format(name, step, l[-1], c[-1], r[-1]))
    
    ax1 = fig.add_subplot(1,4,1)
    ax1 = plt.imshow(env.render(mode='rgb_array'))
    plt.axis('off')
    
    ax2 = fig.add_subplot(1,4,2)
    ax2 = plt.plot(l)
    
    ax3 = fig.add_subplot(1,4,3)
    ax3 = plt.plot(c)
    
    ax4 = fig.add_subplot(1,4,4)
    ax4 = plt.plot(r)
    
    
    fig.set_figwidth(15)
    plt.tight_layout()
    
    display.clear_output(wait=True)
    display.display(plt.gcf())

def get_env():
    env = wrappers.CroppedImagePCGRLWrapper("sokoban-narrow-v0", 12, random_tile=False)
    return env

def get_action(env, model, action_type=True):
    action = None
    if action_type == 0:
        action, _ = model.predict(obs)
    elif action_type == 1:
        action_prob = model.action_probability(obs)[0]
        action = np.random.choice(a=list(range(len(action_prob))), size=1, p=action_prob)
    else:
        action = np.array([env.action_space.sample()])
    return action

env = DummyVecEnv([get_env])
# Enjoy trained agent
path_length = []
changes = []
regions = []
obs = env.reset()
for i in range(1000):
    action = get_action(env, model, 1)
    obs, rewards, dones, info = env.step(action)
    path_length.append(info[0]['path-length'])
    changes.append(info[0]['changes'])
    regions.append(info[0]['regions'])
    if dones:
        break
    show_state(env, path_length, changes, regions, i)

# Testing Binary Models

In [None]:
import gym
import gym_pcgrl
from gym_pcgrl import wrappers

from stable_baselines.common.policies import MlpPolicy, CnnPolicy, FeedForwardPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines.a2c.utils import conv, linear, conv_to_fc
from stable_baselines.results_plotter import load_results, ts2xy
from stable_baselines import PPO2

import tensorflow as tf
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

p_name = "binary"
t_path = 48
c_size = 28
sample_size = 50
rep_names = ["narrow", "turtle"]
exp_names = ["noaction_heatmap_changes_100M", "heatmap_changes_100M"]

def get_env_func(p_name, c_size, r_name, ch_perc=0.2, t_path=48):
    def get_env():
        if r_name != "wide":
            env = wrappers.CroppedImagePCGRLWrapper("{}-{}-v0".format(p_name, r_name), c_size, random_tile=False, targer_path=t_path, change_percentage=ch_perc)
        else:
            env = wrappers.ActionMapImagePCGRLWrapper("{}-{}-v0".format(p_name, r_name), random_tile=False, targer_path=t_path, change_percentage=ch_perc)
        return env
    return get_env

pathlength_perc = {}
pathlength={}
regions_perc = {}
regions={}
both_perc = {}
for i in range(len(exp_names)):
    r_name = rep_names[i]
    e_name = exp_names[i]
    m_name = "{}_{}_{}".format(p_name, r_name, e_name)
    pathlength_perc[m_name] = []
    pathlength[m_name] = []
    regions_perc[m_name] = []
    regions[m_name] = []
    both_perc[m_name] = []
    model = PPO2.load(m_name)
    for ch_perc in np.arange(0.05, 1.01, 0.05):
        print("Testing {} at change percentage of {}".format(m_name,ch_perc))
        temp_path = []
        temp_length = []
        temp_regions = []
        temp_number = []
        temp_both = []
        for i in range(sample_size):
            env = DummyVecEnv([get_env_func(p_name, c_size, r_name, ch_perc, t_path)])
            done = False
            obs = env.reset()
            while not done:
                action, _ = model.predict(obs)
                obs, rewards, done, info = env.step(action)
            temp_path.append([0,1][info[0]['path-length']>=48])
            temp_length.append(info[0]['path-length'])
            temp_regions.append([0,1][info[0]['regions']==1])
            temp_number.append(info[0]['regions'])
            temp_both.append([0,1][temp_path[-1] == 1 and temp_regions[-1] == 1])
        pathlength_perc[m_name].append(np.mean(temp_path))
        pathlength[m_name].append(np.mean(temp_length))
        regions_perc[m_name].append(np.mean(temp_regions))
        regions[m_name].append(np.mean(temp_number))
        both_perc[m_name].append(np.mean(temp_both))

def plt_dict(p_dict, y_title, file_name):
    plt.figure()
    names = []
    for name in p_dict:
        plt.plot(np.array(np.arange(0.05,1.01,0.05)),p_dict[name])
        names.append(name)
    plt.legend(names)
    plt.xlim(0.05,1.0)
    plt.xticks(np.array(np.arange(0.05,1.01,0.05)), rotation=90)
    plt.xlabel('change percentage')
    plt.ylabel(y_title)
    plt.savefig(file_name)
        
plt_dict(regions_perc, 'Percentage of levels with regions == 1', 'const_regions.pdf')       
plt_dict(pathlength_perc, 'Percentage of levels with path length >= {}'.format(t_path), 'const_pathlength.pdf')
plt_dict(regions, 'Number of Regions', 'regions.pdf')
plt_dict(pathlength, 'Path Length', 'pathlength.pdf')