###### [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adaptive-intelligent-robotics/QDax/blob/main/notebooks/cmamega_example.ipynb)

# Optimizing with CMA-ME in Jax

This notebook shows how to use QDax to find diverse and performing parameters on the Rastrigin problem with [CMA-ME](https://arxiv.org/pdf/1912.02400.pdf). It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:

- how to define the problem
- how to create a CMA-ME emitter
- how to create a Map-elites instance
- which functions must be defined before training
- how to launch a certain number of training steps
- how to visualise the optimization process

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import math

import jax 
import jax.numpy as jnp

try:
    import flax
except:
    !pip install --no-deps git+https://github.com/google/flax.git@v0.4.1 |tail -n 1
    import flax

try:
    import chex
except:
    !pip install --no-deps git+https://github.com/deepmind/chex.git@v0.1.3 |tail -n 1
    import chex
    
try:
    import qdax
except:
    !pip install --no-deps git+https://github.com/adaptive-intelligent-robotics/QDax@main |tail -n 1
    import qdax

from qdax.core.map_elites import MAPElites
from qdax.core.emitters.cma_emitter import CMAEmitter
from qdax.core.emitters.cma_opt_emitter import CMAOptimizingEmitter
from qdax.core.emitters.cma_rnd_emitter import CMARndEmitter
from qdax.core.emitters.cma_multi_emitter import CMAPoolEmitter
from qdax.core.emitters.standard_emitters import MixingEmitter
from qdax.core.containers.mapelites_repertoire import compute_euclidean_centroids, MapElitesRepertoire
# from qdax.utils.plotting import plot_map_elites_results
from qdax.utils.plotting import plot_multidimensional_map_elites_grid

from typing import Dict

In [None]:
# from jax.config import config

# config.update('jax_disable_jit', True)

## Set the hyperparameters

Most hyperparameters are similar to those introduced in [Differentiable Quality Diversity paper](https://arxiv.org/pdf/2106.03894.pdf).

In [None]:
#@title QD Training Definitions Fields
#@markdown ---
num_iterations = 300 #70000 #10000
num_dimensions = 2 #1000 #@param {type:"integer"} # try 20 and 100
# num_centroids = 10000 #@param {type:"integer"}
grid_shape = (100, 100) # (500, 500) 
batch_size = 500 #36 #@param {type:"integer"}
sigma_g = 1. #@param {type:"number"}
minval = -5.12 #@param {type:"number"}
maxval = 5.12 #@param {type:"number"}
min_bd = -5.12 * 0.5 * num_dimensions #@param {type:"number"}
max_bd = 5.12 * 0.5 * num_dimensions #@param {type:"number"}
num_emitters = 3
#@markdown ---

## Defines the scoring function: rastrigin or sphere

In [None]:
def rastrigin_scoring(x: jnp.ndarray):
    first_term = 10 * x.shape[-1]
    second_term = jnp.sum((x + minval * 0.4) ** 2 - 10 * jnp.cos(2 * jnp.pi * (x + minval * 0.4)))
    return -(first_term + second_term)

def sphere_scoring(x: jnp.ndarray):
    return -jnp.sum((x + minval * 0.4) * (x + minval * 0.4), axis=-1)

# fitness_scoring = sphere_scoring
fitness_scoring = rastrigin_scoring

def clip(x: jnp.ndarray):
    in_bound = (x <= maxval) * (x >= minval)
    return jnp.where(
        condition=in_bound,
        x=x,
        y=(maxval / x)
    )

def _behavior_descriptor_1(x: jnp.ndarray):
    return jnp.sum(clip(x[:x.shape[-1]//2]))

def _behavior_descriptor_2(x: jnp.ndarray):
    return jnp.sum(clip(x[x.shape[-1]//2:]))

def _behavior_descriptors(x: jnp.ndarray):
    return jnp.array([_behavior_descriptor_1(x), _behavior_descriptor_2(x)])

In [None]:
def scoring_function(x):
    scores, descriptors = fitness_scoring(x), _behavior_descriptors(x)
    return scores, descriptors, {}

def scoring_fn(x, random_key):
    fitnesses, descriptors, extra_scores = jax.vmap(scoring_function)(x)
    return fitnesses, descriptors, extra_scores, random_key

## Define the metrics that will be used

In [None]:
worst_objective = fitness_scoring(-jnp.ones(num_dimensions) * 5.12)
# worst_objective = rastrigin_scoring(jnp.zeros(num_dimensions))
best_objective = fitness_scoring(jnp.ones(num_dimensions) * 5.12 * 0.4)

num_centroids = math.prod(grid_shape)

def metrics_fn(repertoire: MapElitesRepertoire) -> Dict[str, jnp.ndarray]:

    # get metrics
    grid_empty = repertoire.fitnesses == -jnp.inf
    adjusted_fitness = (
        (repertoire.fitnesses - worst_objective) * 100 / (best_objective - worst_objective)
    )
    qd_score = jnp.sum(adjusted_fitness, where=~grid_empty) # / num_centroids
    coverage = 100 * jnp.mean(1.0 - grid_empty)
    max_fitness = jnp.max(adjusted_fitness)
    return {"qd_score": qd_score, "max_fitness": max_fitness, "coverage": coverage}

## Define the initial population, the emitter and the MAP Elites instance

The emitter is defined using the CMAME emitter class. This emitter is given to a MAP-Elites instance to create an instance of the CMA-ME algorithm.

In [None]:
random_key = jax.random.PRNGKey(0)
initial_population = jax.random.uniform(random_key, shape=(batch_size, num_dimensions))

centroids = compute_euclidean_centroids(
    grid_shape=grid_shape,
    minval=min_bd,
    maxval=max_bd,
)

# Define emitter
import functools
from qdax.core.emitters.mutation_operators import isoline_variation
variation_fn = functools.partial(
    isoline_variation, iso_sigma=0.5, line_sigma=0.
)
mixing_emitter = MixingEmitter(
    mutation_fn=None, 
    variation_fn=variation_fn, 
    variation_percentage=1.0, 
    batch_size=batch_size
)

# emitter = CMAOptimizingEmitter(
emitter = CMAEmitter(
# emitter = CMARndEmitter(
    batch_size=batch_size,
    genotype_dim=num_dimensions,
    centroids=centroids,
    sigma_g=sigma_g,
    min_count=1,
    max_count=20, #20, # None
)

emitter = CMAPoolEmitter(
    num_states=num_emitters,
    emitter=emitter
)

map_elites = MAPElites(
    scoring_function=scoring_fn,
    emitter=emitter,
#     emitter=mixing_emitter,
    metrics_function=metrics_fn
)

In [None]:
repertoire, emitter_state, random_key = map_elites.init(initial_population, centroids, random_key)

In [None]:
%%time

means = [emitter_state.emitter_states.cmaes_state.mean]
covs = [emitter_state.emitter_states.cmaes_state.cov_matrix]
repertoires = [repertoire]

for i in range(num_iterations):
    
    (repertoire, emitter_state, random_key,), metrics = jax.lax.scan(
        map_elites.scan_update,
        (repertoire, emitter_state, random_key),
        (),
        length=1,
    )
    
    print("Emitter state current index: ", emitter_state.current_index)
    print("Emitter state counts: ", emitter_state.emitter_states.emit_count)
    
    
    means.append(emitter_state.emitter_states.cmaes_state.mean)
    covs.append(emitter_state.emitter_states.cmaes_state.cov_matrix)
    repertoires.append(repertoire)

In [None]:
for k, v in metrics.items():
    print(f"{k} after {num_iterations * batch_size}: {v[-1]}")

In [None]:
metrics

In [None]:
# #@title Visualization

# # create the x-axis array
# env_steps = jnp.arange(num_iterations) * batch_size

# # create the plots and the grid
# fig, axes = plot_map_elites_results(
#     env_steps=env_steps, metrics=metrics, repertoire=repertoire, min_bd=min_bd, max_bd=max_bd
# )

In [None]:
emitter_state.current_index

In [None]:
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt

def plot_map_elites_results(
    env_steps: jnp.ndarray,
    metrics: Dict,
    repertoire: MapElitesRepertoire,
    min_bd: jnp.ndarray,
    max_bd: jnp.ndarray,
):
    """Plots three usual QD metrics, namely the coverage, the maximum fitness
    and the QD-score, along the number of environment steps. This function also
    plots a visualisation of the final map elites grid obtained. It ensures that
    those plots are aligned together to give a simple and efficient visualisation
    of an optimization process.

    Args:
        env_steps: the array containing the number of steps done in the environment.
        metrics: a dictionary containing metrics from the optimizatoin process.
        repertoire: the final repertoire obtained.
        min_bd: the mimimal possible values for the bd.
        max_bd: the maximal possible values for the bd.

    Returns:
        A figure and axes with the plots of the metrics and visualisation of the grid.
    """
    # Customize matplotlib params
    font_size = 16
    params = {
        "axes.labelsize": font_size,
        "axes.titlesize": font_size,
        "legend.fontsize": font_size,
        "xtick.labelsize": font_size,
        "ytick.labelsize": font_size,
        "text.usetex": False,
        "axes.titlepad": 10,
    }

    mpl.rcParams.update(params)

    # Visualize the training evolution and final repertoire
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(40, 10))

    # env_steps = jnp.arange(num_iterations) * episode_length * batch_size

    axes[0].plot(env_steps, metrics["coverage"])
    axes[0].set_xlabel("Environment steps")
    axes[0].set_ylabel("Coverage in %")
    axes[0].set_title("Coverage evolution during training")
    axes[0].set_aspect(0.95 / axes[0].get_data_ratio(), adjustable="box")

    axes[1].plot(env_steps, metrics["max_fitness"])
    axes[1].set_xlabel("Environment steps")
    axes[1].set_ylabel("Maximum fitness")
    axes[1].set_title("Maximum fitness evolution during training")
    axes[1].set_aspect(0.95 / axes[1].get_data_ratio(), adjustable="box")

    axes[2].plot(env_steps, metrics["qd_score"])
    axes[2].set_xlabel("Environment steps")
    axes[2].set_ylabel("QD Score")
    axes[2].set_title("QD Score evolution during training")
    axes[2].set_aspect(0.95 / axes[2].get_data_ratio(), adjustable="box")

#     _, axes = plot_2d_map_elites_repertoire(
#         centroids=repertoire.centroids,
#         repertoire_fitnesses=repertoire.fitnesses,
#         minval=min_bd,
#         maxval=max_bd,
#         repertoire_descriptors=repertoire.descriptors,
#         ax=axes[3],
#     )

    return fig, axes

In [None]:
# env_steps = jnp.arange(num_iterations) * batch_size

# fig, axes = plot_map_elites_results(
#     env_steps=env_steps, metrics=metrics, repertoire=repertoire, min_bd=min_bd, max_bd=max_bd
# )

In [None]:
# fig, axes = plot_multidimensional_map_elites_grid(
#     repertoire=repertoire,
#     minval=jnp.array([min_bd, min_bd]),
#     maxval=jnp.array([max_bd, max_bd]),
#     grid_shape=grid_shape,
# )

In [None]:
means, covs

## Trying dynamic visualisation tool

In [None]:
from matplotlib.patches import Ellipse

# Import animation package
from matplotlib.animation import FuncAnimation

In [None]:
# Change matplotlib backend
%matplotlib notebook

In [None]:
from matplotlib.colors import Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable


In [None]:
# fig, ax = plt.subplots(figsize=(12, 6))
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)

# # sample points to show fitness landscape
key = jax.random.PRNGKey(0)
x = jax.random.uniform(key, minval=-5, maxval=5, shape=(200000, 2))

f_x = jax.vmap(fitness_scoring)(x)


# plot fitness landscape
plot1 = ax.scatter(x[:, 0], x[:, 1], c=f_x, s=5, alpha=0.01)
# fig.colorbar(plot1)

fake_plot = ax.scatter(
    jnp.array([maxval * 100, 100 * minval]), jnp.array([maxval * 100, minval * 100]), c=[-100, 0], s=0.01
)
fig.colorbar(fake_plot)


colors = ['r', 'tab:orange', 'tab:brown']
ellipses = []
for j in range(num_emitters):
    mean = means[0][j]
    cov = covs[0][j]
    ellipse = Ellipse((mean[0], mean[1]), cov[0, 0], cov[1, 1], fill=False, color=colors[j], ls='--')
    
    ellipses.append(ellipse)
    ax.add_patch(ellipse)
    

def single_frame_plot(i, j):
    # get data
    mean = means[i][j]
    cov = covs[i][j]
    
    print("Mean: ", mean)
    print("Cov: ", cov)
    
    ellipses[j].set_center((mean[0], mean[1]))
    ellipses[j].width = cov[0, 0]
    ellipses[j].height = cov[1, 1]

    ax.plot(mean[0], mean[1], color=colors[j], marker='x')
    
def frame_plot(i):
    for j in range(num_emitters):
        single_frame_plot(i, j)
    pass

    latest_genotypes = repertoires[i].genotypes
    latest_fitnesses = repertoires[i].fitnesses
    
    ax.scatter(latest_genotypes[:, 0], latest_genotypes[:, 1], c=latest_fitnesses, s=1.)


# plot cma-es trajectory
traj_min = 0
traj_max = 500

my_frames = range(traj_min, traj_max)

dynamic_plot = FuncAnimation(fig=fig, func=frame_plot, frames=my_frames, interval=1000, repeat=True)
    
ax.set_title(f"CMA-ME using 3 improvement emitters on a ragistrin function")
plt.xlim([-5, 5])
plt.ylim([-5, 5])

ax.set_xlabel("Search Dimension 1", fontsize=14)
ax.set_ylabel("Search Dimension 2", fontsize=14)

fig.tight_layout()

plt.show()

In [None]:
# Save and show animation
dynamic_plot.save('cma_me_ragistrin.gif', writer='imagemagick', fps=10)

In [None]:
import numpy as np

N = 100
x = np.linspace(-4.0, 4.0, N)
y = np.linspace(-4.0, 4.0, N)

X, Y = np.meshgrid(x, y)

# A low hump with a spike coming out.
# Needs to have z/colour axis on a log scale so we see both hump and spike.
# linear scale only shows the spike.
Z1 = np.exp(-X**2 - Y**2)
Z2 = np.exp(-(X * 10)**2 - (Y * 10)**2)
z = Z1 + 50 * Z2

In [None]:
X

In [None]:
z

In [None]:
fig, ax = plt.subplots()
cs = ax.contourf(X, Y, z)

# Alternatively, you can manually set the levels
# and the norm:
# lev_exp = np.arange(np.floor(np.log10(z.min())-1),
#                    np.ceil(np.log10(z.max())+1))
# levs = np.power(10, lev_exp)
# cs = ax.contourf(X, Y, z, levs, norm=colors.LogNorm())

cbar = fig.colorbar(cs)

plt.show()