In [None]:
#pip install tensorflow==1.15
#Install stable-baselines as described in the documentation

import gym
import gym_pcgrl
from gym_pcgrl import wrappers

from helper import get_exp_name, max_exp_idx, load_model, make_env

from stable_baselines.common.vec_env import DummyVecEnv

import tensorflow as tf
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

# Run Model

In [None]:
def show_state(env, step=0, changes=0, total_reward=0, name=""):
    fig = plt.figure(10)
    plt.clf()
    plt.title("{} | Step: {} Changes: {} Total Reward: {}".format(name, step, changes, total_reward))
    plt.axis('off')
    display.clear_output(wait=True)
    display.display(plt.gcf())

def get_action(env, model, action_type=0):
    action = None
    if action_type == 0:
        action, _ = model.predict(obs)
    elif action_type == 1:
        action_prob = model.action_probability(obs)[0]
        action = np.random.choice(a=list(range(len(action_prob))), size=1, p=action_prob)
    else:
        action = np.array([env.action_space.sample()])
    return action

def get_model(game, representation, experiment, **kwargs):
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    n = max_exp_idx(exp_name)
    if n == 0:
        raise Exception('Did not find ranked saved model of experiment: {}'.format(exp_name))
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    model = load_model(log_dir)
    return model

game = "binary"
representation = "narrow"
experiment = "100M"
env_name = '{}-{}-v0'.format(game, representation)
inf_kwargs = {
    'change_percentage': 0.4
}
env =  DummyVecEnv([make_env(env_name, representation, 0, None, **inf_kwargs)])
kwargs = {
    'cropped-size': 28
}
model = get_model(game, representation, experiment, **kwargs)
obs = env.reset()
done = False
total_reward = 0
while not done:
    action = get_action(env, model, 0)
    obs, rewards, done, info = env.step(action)
    total_reward += rewards
    if done:
        break
    show_state(env, infor['iterations'], info['changes'], total_reward)

# Prepare Models for Analysis

In [None]:
binary_lambdas = {
    'pathlength': lambda info: info['path-length'],
    'regions': lambda info: info['regions'],
    'iterations': lambda info: info['iterations'] / (1.0 * info['max_iterations']),
    'changes': lambda info: info['changes'] / (1.0 * info['max_changes']),
    'pathlength_const': lambda info: [0, 1][info['path-length'] >= 48],
    'regions_const': lambda info: [0, 1][info['regions'] == 1],
}
zelda_lambdas = {
    'player': lambda info: info['player'],
    'key': lambda info: info['key'],
    'door': lambda info: info['door'],
    'regions': lambda info: info['regions'],
    'nearestenemy': lambda info: info['nearest-enemy'],
    'pathlength': lambda info: info['path-length'],
    'iterations': lambda info: info['iterations'] / (1.0 * info['max_iterations']),
    'changes': lambda info: info['changes'] / (1.0 * info['max_changes']),
    'player_const': lambda info: [0, 1][info['player'] == 1],
    'key_const': lambda info: [0, 1][info['key'] == 1],
    'door_const': lambda info: [0, 1][info['door'] == 1],
    'regions_const': lambda info: [0, 1][info['regions'] == 1],
    'nearestenemy_const': lambda info: [0, 1][info['nearest-enemy'] >= 4],
    'pathlength_const': lambda info: [0, 1][info['path-length'] >= 16],
}
sokoban_lambdas = {
    'player': lambda info: info['player'],
    'crate': lambda info: info['crate'],
    'target': lambda info: info['target'],
    'regions': lambda info: info['regions'],
    'sollength': lambda info: info['sol-length'],
    'iterations': lambda info: info['iterations'] / (1.0 * info['max_iterations']),
    'changes': lambda info: info['changes'] / (1.0 * info['max_changes']),
    'player_const': lambda info: [0, 1][info['player'] == 1],
    'ratio_const': lambda info: [0, 1][info['crate'] == info['target'] and info['crate'] > 0],
    'sollength_const': lambda info: [0, 1][info['sol-length'] >= 18],
}
lambdas = {
    'binary': binary_lambdas,
    'zelda': zelda_lambdas,
    'sokoban': sokoban_lambdas
}

def get_hamming_diversity(lvls):
    hamming = []
    for i in range(len(lvls)):
        lvl1 = lvls[i]
        lvl_hamming = []
        for j in range(len(lvls)):
            lvl2 = lvls[j]
            if i != j:
                diff = np.clip(abs(lvl1 - lvl2), 0, 1)
                lvl_hamming.append(diff.sum())
        hamming.append(np.mean(lvl_hamming) / (lvls[0].shape[0] * lvls[0].shape[1]))
    return hamming

def sample_data(sample_size, env, lambdas):
    sample_info = {}
    lvls = []
    for name in lambdas:
        sample_info[name] = []
    for i in range(sample_size):
        done = False
        obs = env.reset()
        while not done:
            action, _ = model.predict(obs)
            obs, rewards, done, info = env.step(action)
        lvls.append(env.get_attr('pcgrl_env')[0]._rep.get_observation()['map'])
        for name in lambdas:
            sample_info[name].append(lambdas[name](info[0]))
    sample_info['diversity'] = get_hamming_diversity(lvls)
    return sample_info

def get_model(game, representation, experiment, **kwargs):
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    n = max_exp_idx(exp_name)
    if n == 0:
        raise Exception('Did not find ranked saved model of experiment: {}'.format(exp_name))
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    model = load_model(log_dir)
    return model

# Collect Models' Results for Analysis

In [None]:
p_name = "binary"
sample_size = 100
rep_names = ["narrow", "turtle", "wide"]
exp_names = ["noaction_heatmap_changes_100M.zip", "heatmap_changes_100M.zip", "100M.pkl"]
kwargs={
    'cropped_size': 28
}

result = {}
for i in range(len(exp_names)):
    r_name = rep_names[i]
    e_name = exp_names[i]
    m_name = get_exp_name(game, representation, experiment)
    env_name = "{}-{}-v0".format(p_name, r_name)
    model = get_model(p_name, r_name, e_name)
    result[m_name] = {}
    for ch_perc in np.arange(0, 1.01, 0.1):
        print("Testing {} at change percentage of {}".format(m_name, ch_perc))
        kwargs['change_percentage'] = ch_perc
        env = DummyVecEnv([make_env(env_name, r_name, 0, None, **kwargs)])
        temp_result = sample_data(sample_size, env, lambdas[p_name])
        for name in temp_result:
            if not(name in result[m_name]):
                result[m_name][name] = []
            result[m_name][name].append(np.mean(temp_result[name]))

# Render Models' Results Graphs

In [None]:
def get_data(results, name):
    output = {}
    for n in results:
        output[n] = results[n][name]
    return output

def plt_dict(p_dict, y_title, file_name):
    plt.figure()
    names = []
    for name in p_dict:
        plt.plot(np.array(np.arange(0.0,1.01,0.1)),p_dict[name])
        names.append(name)
    plt.legend(names)
    plt.xlim(0.0,1.0)
    plt.xticks(np.array(np.arange(0.0,1.01,0.1)), rotation=90)
    plt.xlabel('change percentage')
    plt.ylabel(y_title)
    plt.savefig(file_name + ".pdf")

for n in lambdas[p_name]:
    plt_dict(get_data(result, n), n, n)
plt_dict(get_data(result, 'diversity'), 'diversity', 'diversity')