In [1]:
import os
from pathlib import Path
project_root = os.path.join(str(Path.home()) + '/Documents', 'PPGADev')
os.chdir(project_root)
%pwd # should be PPGA root dir

'/home/icaros/Documents/PPGADev'

In [2]:
import pickle

import numpy as np
from attrdict import AttrDict
from RL.ppo import *
from utils.utilities import log
from envs.brax_custom.brax_env import make_vec_env_brax
from models.actor_critic import Actor, PGAMEActor
from pandas import DataFrame

from IPython.display import HTML, Image
from IPython.display import display
from brax.io import html, image

In [3]:
# params to config
device = torch.device('cuda')
env_name = 'humanoid'
# env_name = 'walker2d'
seed = 1111
normalize_obs = True
normalize_rewards = True
# non-configurable params
obs_shapes = {
    'humanoid': (227,),
    'ant': (87,),
    'halfcheetah': (18,),
    'walker2d': (17,)
}
action_shapes = {
    'humanoid': (17,),
    'ant': (8,),
    'halfcheetah': (6,),
    'walker2d': (6,)
}

# define the final config objects
actor_cfg = AttrDict({
        'obs_shape': obs_shapes[env_name],
        'action_shape': action_shapes[env_name],
        'normalize_obs': normalize_obs,
        'normalize_rewards': normalize_rewards,
})
env_cfg = AttrDict({
        'env_name': env_name,
        'env_batch_size': None,
        'num_dims': 2 if not 'ant' in env_name else 4,
        'envs_per_model': 1,
        'seed': seed,
        'num_envs': 1,
        'clip_obs_rew': False,
        'is_energy_measures': True
})

if env_cfg.is_energy_measures:
    env_cfg.num_dims += 1


In [4]:
# now lets load in a saved archive dataframe and scheduler
# change this to be your own checkpoint path
# archive_path = 'experiments/paper_ppga_ant/1111/checkpoints/cp_00001390/archive_df_00001390.pkl'
# scheduler_path = 'experiments/paper_ppga_ant/1111/checkpoints/cp_00001390/scheduler_00001390.pkl'

# archive_path = 'experiments/paper_ppga_walker2d/1111/checkpoints/cp_00001700/archive_df_00001700.pkl'
# scheduler_path = 'experiments/paper_ppga_walker2d/1111/checkpoints/cp_00001700/scheduler_00001700.pkl'

archive_path = 'experiments/paper_ppga_humanoid/1111/checkpoints/cp_00002000/archive_df_00002000.pkl'
scheduler_path = 'experiments/paper_ppga_humanoid/1111/checkpoints/cp_00002000/scheduler_00002000.pkl'

with open(archive_path, 'rb') as f:
    archive_df = pickle.load(f)
# with open(scheduler_path, 'rb') as f:
#     scheduler = pickle.load(f)

In [5]:
from ribs.archives import GridArchive
all_solutions = archive_df.solution_batch()
all_objectives = archive_df.objective_batch()
all_measures = archive_df.measures_batch()
all_metadata = archive_df.metadata_batch()
print(all_solutions.shape)

archive = GridArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    dims=[20, 20, 20],  # 50 cells along each dimension.
    ranges=[(0.0, 1.0), (0.0, 1.0), (0.0, 16.0)],  # (-1, 1) for x-pos and (-3, 0) for y-vel.
    qd_offset=0.0,  # See the note below.
)

# ant offset = 3.24, 5 dims - 4 (0, 1.0), 1 (0, 8.0), grid 10x10x10....
# walker offset = 1.413, 3 dims - 2 (0, 1.0), 1 (0, 6.0), grid 50x50x50
# humanoid offset = 0.0, 3 dims - 2 (0, 1.0), 1 (0, 16.0) grid 20x20x20

# archive.add(all_solutions, all_objectives, all_measures)

(277, 47906)


In [7]:
archive.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000], all_metadata[0:1000])

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [8]:
i = 1001
end = min(i + 1000, all_solutions.shape[0])
while i < all_solutions.shape[0]:
    end = min(i + 1000, all_solutions.shape[0])
    archive.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end], all_metadata[i:end])
    print(i, " to ", end)
    i += 1000
    

In [9]:
# create the environment
env = make_vec_env_brax(env_cfg)

feet contact, is_energy_measures =  True
env step fn
<function humanoid_step at 0x7f41c7416280>
<brax.envs.humanoid.Humanoid object at 0x7f40dd4fd550>
feet contact and energy


In [10]:
def get_best_elite():
    best_elite = archive.best_elite
    print(f'Loading agent with reward {best_elite.objective} and measures {best_elite.measures}')
    agent = Actor(obs_shape=actor_cfg.obs_shape[0], action_shape=actor_cfg.action_shape, normalize_obs=normalize_obs, normalize_returns=normalize_rewards).deserialize(best_elite.solution).to(device)
#     print("elite metadata")
#     print(best_elite.metadata)
    if actor_cfg.normalize_obs:
        norm = best_elite.metadata['obs_normalizer']
        if isinstance(norm, dict):
            agent.obs_normalizer.load_state_dict(norm)
        else:
            agent.obs_normalizer = norm
    return agent

In [11]:
def get_random_elite():
#     elite = scheduler.archive.sample_elites(1)
    elite = archive.sample_elites(1)
    print(f'Loading agent with reward {elite.objective[0]} and measures {elite.measures[0]}')
    agent = Actor(obs_shape=actor_cfg.obs_shape[0], action_shape=actor_cfg.action_shape, normalize_obs=normalize_obs, normalize_returns=normalize_rewards).deserialize(elite.solution_batch.flatten()).to(device)
    if actor_cfg.normalize_obs:
        norm = elite.metadata['obs_normalizer']
        if isinstance(norm, dict):
            agent.obs_normalizer.load_state_dict(norm)
        else:
            agent.obs_normalizer = norm
    return agent

In [12]:
def get_elite(measures):
#     elite = scheduler.archive.elites_with_measures_single(measures)
    elite = archive.elites_with_measures_single(measures)
    print(f'Loading agent with reward {elite.objective} and measures {elite.measures}')
    agent = Actor(obs_shape=actor_cfg.obs_shape[0], action_shape=actor_cfg.action_shape, normalize_obs=normalize_obs, normalize_returns=normalize_rewards).deserialize(elite.solution.flatten()).to(device)
    print("elite")
    print(elite)
#     print("elite metadata")
#     print(elite.metadata)
    if actor_cfg.normalize_obs:
        norm = elite.metadata['obs_normalizer']
        if isinstance(norm, dict):
            agent.obs_normalizer.load_state_dict(norm)
        else:
            agent.obs_normalizer = norm
    return agent

In [13]:
def enjoy_brax(agent, render=True, deterministic=True):
    if actor_cfg.normalize_obs:
        obs_mean, obs_var = agent.obs_normalizer.obs_rms.mean, agent.obs_normalizer.obs_rms.var
        print(f'{obs_mean=}, {obs_var=}')

    obs = env.reset()
    rollout = [env.unwrapped._state]
    total_reward = 0
    measures = torch.zeros(env_cfg.num_dims).to(device)
    done = False
    while not done:
        with torch.no_grad():
            obs = obs.unsqueeze(dim=0).to(device)
            if actor_cfg.normalize_obs:
                obs = (obs - obs_mean) / torch.sqrt(obs_var + 1e-8)

            if deterministic:
                act = agent.actor_mean(obs)
            else:
                act, _, _ = agent.get_action(obs)
            act = act.squeeze()
            obs, rew, done, info = env.step(act.cpu())
            measures += info['measures']
            rollout.append(env.unwrapped._state)
            total_reward += rew
    if render:
        i = HTML(html.render(env.unwrapped._env.sys, [s.qp for s in rollout]))
        display(i)
        print(f'{total_reward=}')
        print(f' Rollout length: {len(rollout)}')
        measures /= len(rollout)
        print(f'Measures: {measures.cpu().numpy()}')
    return total_reward.detach().cpu().numpy()

In [None]:
# agent = get_random_elite()
print("line")
print("line")
agent = get_best_elite()
enjoy_brax(agent, render=True, deterministic=True)

In [22]:
agent2 = get_elite([0.4, 0.4, 14])
enjoy_brax(agent2, render=True, deterministic=True)

Loading agent with reward 3320.581738444655 and measures [ 0.40820003  0.4034     14.22305298]
elite
Elite(solution=array([-0.02203386, -0.02323742, -0.02643778, ...,  0.32423127,
        0.15388647,  0.25561556]), objective=3320.581738444655, measures=array([ 0.40820003,  0.4034    , 14.22305298]), index=3377, metadata={'traj_length': 1000.0, 'obs_normalizer': OrderedDict([('obs_rms.mean', tensor([-4.1932e-01,  1.0600e-01, -9.7430e-03,  5.6179e-01,  1.1457e-02,
         1.0744e-01, -1.3792e+00, -1.2618e+00,  2.3554e-02, -1.5397e-02,
        -5.8306e-02, -4.6104e-01, -6.9872e-01, -5.0502e-01,  1.1058e+00,
         3.7013e-01, -1.0754e-03,  2.5579e-01, -2.5780e-02,  1.0723e-02,
         2.9321e-01,  1.6384e-02,  1.4275e+00,  5.3125e-02, -1.9302e-01,
        -1.2252e-01,  2.6880e-01, -1.6672e-02,  1.2362e-03,  1.1383e-01,
         5.1930e-02, -4.5674e-02, -3.4552e-02,  4.8184e-02, -7.0700e-02,
         8.7777e-02,  2.7833e-02, -9.9599e-02, -3.4424e-04, -3.7973e-02,
         1.5402e-02, -

total_reward=tensor(620.0247, device='cuda:0')
 Rollout length: 1001
Measures: [ 0.48251748  0.32567433 10.531135  ]


array(620.02466, dtype=float32)

In [None]:
import matplotlib.pyplot as plt
from ribs.visualize import parallel_axes_plot
plt.figure(figsize=(8, 6))
parallel_axes_plot(archive)
plt.show

In [None]:
# visualizing archive - ANT

In [None]:
# ant
from ribs.archives import CVTArchive
cvt_archive1 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 8.0)]
)

cvt_archive2 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 8.0)]
)

cvt_archive3 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 8.0)]
)

cvt_archive4 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 8.0)]
)

cvt_archive1.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 4]], all_metadata[0:1000])
cvt_archive2.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [1, 4]], all_metadata[0:1000])
cvt_archive3.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [2, 4]], all_metadata[0:1000])
cvt_archive4.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [3, 4]], all_metadata[0:1000])


In [None]:
i = 1001
end = min(i + 1000, all_solutions.shape[0])
while i < all_solutions.shape[0]:
    end = min(i + 1000, all_solutions.shape[0])
#     archive.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end], all_metadata[i:end])

    cvt_archive1.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 4]], all_metadata[i:end])
    cvt_archive2.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [1, 4]], all_metadata[i:end])
    cvt_archive3.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [2, 4]], all_metadata[i:end])
    cvt_archive4.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [3, 4]], all_metadata[i:end])
    print(i, " to ", end)
    i += 1000


In [None]:
import matplotlib.pyplot as plt
from ribs.visualize import cvt_archive_heatmap
cvt_archive_heatmap(cvt_archive1)
plt.title("Measure 1 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive2)
plt.title("Measure 2 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive3)
plt.title("Measure 3 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive4)
plt.title("Measure 4 vs Energy")
plt.show()

In [None]:
# ant
# from ribs.archives import CVTArchive
cvt_archive12 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)

cvt_archive13 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)

cvt_archive14 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)

cvt_archive23 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)


cvt_archive24 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)


cvt_archive34 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)


cvt_archive12.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 1]], all_metadata[0:1000])
cvt_archive13.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 2]], all_metadata[0:1000])
cvt_archive14.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 3]], all_metadata[0:1000])
cvt_archive23.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [1, 2]], all_metadata[0:1000])
cvt_archive24.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [1, 3]], all_metadata[0:1000])
cvt_archive34.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [2, 3]], all_metadata[0:1000])


In [None]:
i = 1001
end = min(i + 1000, all_solutions.shape[0])
while i < all_solutions.shape[0]:
    end = min(i + 1000, all_solutions.shape[0])
#     archive.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end], all_metadata[i:end])

    cvt_archive12.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 1]], all_metadata[i:end])
    cvt_archive13.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 2]], all_metadata[i:end])
    cvt_archive14.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 3]], all_metadata[i:end])
    cvt_archive23.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [1, 2]], all_metadata[i:end])
    cvt_archive24.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [1, 3]], all_metadata[i:end])
    cvt_archive34.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [2, 3]], all_metadata[i:end])
    print(i, " to ", end)
    i += 1000


In [None]:

cvt_archive_heatmap(cvt_archive12)
plt.title("Measure 1 vs Measure 2")
plt.show()
cvt_archive_heatmap(cvt_archive13)
plt.title("Measure 1 vs Measure 3")
plt.show()
cvt_archive_heatmap(cvt_archive14)
plt.title("Measure 1 vs Measure 4")
plt.show()
cvt_archive_heatmap(cvt_archive23)
plt.title("Measure 2 vs Measure 3")
plt.show()
cvt_archive_heatmap(cvt_archive24)
plt.title("Measure 2 vs Measure 4")
plt.show()
cvt_archive_heatmap(cvt_archive34)
plt.title("Measure 3 vs Measure 4")
plt.show()

In [None]:
# visualizing archive - WALKER2D

In [None]:
# walker2d
from ribs.archives import CVTArchive
cvt_archive1 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 6.0)]
)

cvt_archive2 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 6.0)]
)

cvt_archive12 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)



cvt_archive1.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 2]], all_metadata[0:1000])
cvt_archive2.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [1, 2]], all_metadata[0:1000])
cvt_archive12.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 1]], all_metadata[0:1000])

In [None]:
i = 1001
end = min(i + 1000, all_solutions.shape[0])
while i < all_solutions.shape[0]:
    end = min(i + 1000, all_solutions.shape[0])
#     archive.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end], all_metadata[i:end])

    cvt_archive1.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 2]], all_metadata[i:end])
    cvt_archive2.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [1, 2]], all_metadata[i:end])
    cvt_archive12.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 1]], all_metadata[i:end])
    print(i, " to ", end)
    i += 1000


In [None]:
import matplotlib.pyplot as plt
from ribs.visualize import cvt_archive_heatmap
cvt_archive_heatmap(cvt_archive1)
plt.title("Measure 1 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive2)
plt.title("Measure 2 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive12)
plt.title("Measure 1 vs Measure 2")
plt.show()

In [None]:
# visualizing archive - HUMANOID

In [None]:
# humanoid
from ribs.archives import CVTArchive
cvt_archive1 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 16.0)]
)

cvt_archive2 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 16.0)]
)

cvt_archive12 = CVTArchive(
    solution_dim=all_solutions.shape[1],  # Dimensionality of solutions in the archive.
    cells=1000,
    ranges=[(0.0, 1.0), (0.0, 1.0)]
)

cvt_archive1.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 2]], all_metadata[0:1000])
cvt_archive2.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [1, 2]], all_metadata[0:1000])
cvt_archive12.add(all_solutions[0:1000], all_objectives[0:1000], all_measures[0:1000][:, [0, 1]], all_metadata[0:1000])

In [None]:
i = 1001
end = min(i + 1000, all_solutions.shape[0])
while i < all_solutions.shape[0]:
    end = min(i + 1000, all_solutions.shape[0])
#     archive.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end], all_metadata[i:end])

    cvt_archive1.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 2]], all_metadata[i:end])
    cvt_archive2.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [1, 2]], all_metadata[i:end])
    cvt_archive12.add(all_solutions[i:end], all_objectives[i:end], all_measures[i:end][:, [0, 1]], all_metadata[i:end])
    print(i, " to ", end)
    i += 1000


In [None]:
import matplotlib.pyplot as plt
from ribs.visualize import cvt_archive_heatmap
cvt_archive_heatmap(cvt_archive1)
plt.title("Measure 1 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive2)
plt.title("Measure 2 vs Energy")
plt.show()
cvt_archive_heatmap(cvt_archive12)
plt.title("Measure 1 vs Measure 2")
plt.show()