# Random Goal Exploration with Learned Reps

In [73]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets
%matplotlib notebook
plt.style.use('seaborn-darkgrid')
import pickle
import os
import json
import pprint
import warnings
warnings.filterwarnings('ignore')
from itertools import *
import scipy.stats
from tqdm import tqdm_notebook as tqdm
import skl_groups
from skl_groups.divergences import KNNDivergenceEstimator
from skl_groups.features import Features
import matplotlib.lines

## Config

In [74]:
path = "Rge-Rep Vae Armball 2017-12-20 15:59:49.019636/"
with open(os.path.join(path, "config.json")) as f:
    config = json.load(f)
pprint.pprint(config)

{u'deformation': 0.0,
 u'distractor': False,
 u'embedding': u'vae',
 u'environment': u'armball',
 u'explo_ratio': 0.05,
 u'name': u'Rge-Rep Vae Armball 2017-12-20 15:59:49.019636',
 u'nb_bins_exploration_ratio': 10,
 u'nb_exploration_iterations': 5000,
 u'nb_period_callback': 10,
 u'nb_samples': 10000,
 u'nb_samples_divergence': 1000,
 u'nb_samples_manifold': 1000,
 u'nb_samples_mse': 100,
 u'nlatents': 10,
 u'noise': 0.0,
 u'outliers': 0.0,
 u'path': u'results/Rge-Rep Vae Armball 2017-12-20 15:59:49.019636',
 u'sampling': u'normal',
 u'test': False,
 u'verbose': False}


# Representation Learning

### Training images

In [3]:
X = np.load(os.path.join(path, 'training_images.npy'))
fig, ax = plt.subplots(3,3, figsize=(9,9))
for i in range(9):
    ax[i%3, i/3].imshow(X[i])
    ax[i%3, i/3].axis("off")

<IPython.core.display.Javascript object>

### Training States

In [4]:
states = np.load(os.path.join(path, 'samples_states.npy'))
n_states=states.shape[-1]
fig, ax = plt.subplots(n_states,n_states, figsize=(9,9))
for i in range(n_states**2):
    ax[i%n_states, i/n_states].scatter(states[:,i%n_states], states[:,i/n_states], s=5., alpha=.6)
    ax[i%n_states, i/n_states].axis('off')
fig.suptitle("Joint Plots of sampled states for image generation");

<IPython.core.display.Javascript object>

### Training Geodesics

In [5]:
geodesics = np.load(os.path.join(path, 'samples_geodesics.npy'))
n_geodesics=geodesics.shape[-1]
fig, ax = plt.subplots(n_geodesics,n_geodesics, figsize=(9,9))
for i in range(n_geodesics**2):
    ax[i%n_geodesics, i/n_geodesics].scatter(geodesics[:,i%n_geodesics], geodesics[:,i/n_geodesics], s=5., alpha=.6)
    ax[i%n_geodesics, i/n_geodesics].axis('off')

<IPython.core.display.Javascript object>

### Latent projection of training set after training

In [7]:
latents = np.load(os.path.join(path, 'training_latents.npy'))
plt.figure()
plt.scatter(latents[:,0], latents[:,1], s=1., cmap='jet', c = range(latents.shape[0]))

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f715fe456d0>

In [654]:
latents = np.load(os.path.join(path, 'training_latents.npy'))
n_latents=latents.shape[-1]
fig, ax = plt.subplots(n_latents,n_latents, figsize=(9,9))
for i in range(n_latents**2):
    ax[i%n_latents, i/n_latents].scatter(latents[:,i%n_latents], 
                                         latents[:,i/n_latents], 
                                         s=5., 
                                         c=range(latents.shape[0]),
                                         cmap='jet',
                                         marker='.',
                                         alpha=.6)
    
    ax[i%n_latents, i/n_latents].axis('off')

<IPython.core.display.Javascript object>

## Exploration Dynamics

### Location Explored

In [8]:
with open(os.path.join(path, "explored_states_history.pkl"), 'rb') as f:
    explored_states_history = pickle.load(f)
arm = scipy.misc.imread('test.png')

In [9]:
fig = plt.figure(figsize=(9,9))
ax = fig.add_subplot(1, 1, 1)
scatt = ax.scatter(explored_states_history[498][:,0], 
                   explored_states_history[498][:,1],
                   cmap='jet', s=10., alpha=.6)
ax.imshow(arm, extent=[-1,1, -1, 1], alpha=.3)
def update(epoch):
    scatt.set_offsets(explored_states_history[epoch*10][:,0:2])
    fig.canvas.draw()
ipywidgets.interact(update, epoch=(0, 48));

<IPython.core.display.Javascript object>

A Jupyter Widget

### Evolution of KL-Div with attainable points.

In [15]:
def divergence_estimation(X_s, X_l, k=10):

    div = KNNDivergenceEstimator(div_funcs=['kl'], Ks=[k], n_jobs=4, clamp=True, do_sym = 1)
    X_s = X_s.astype(np.float32)
    X_l = X_l.astype(np.float32)
    f_s = Features(X_s, n_pts=[X_s.shape[0]])
    f_l = Features(X_l, n_pts=[X_l.shape[0]])
    div.fit(X=f_s)
    kls = div.transform(X=f_l).squeeze()

    return kls

def discretized_kl_div(X_s, n_bins=30):
    
    n_samples, n_dim = X_s.shape
    
    histp , _ = np.histogramdd(X_s, bins=n_bins, range=np.array([[-1]*n_dim, [1]* n_dim]).T)
    
    histp = histp.ravel() / histp.sum()
    
    histq = np.ones(histp.shape)
    
    histq = histq / histq.sum()
    
    
    return scipy.stats.entropy(histp, histq)

def sample_in_attainable(nb_points, env):
    
    if env == 'armball':
        i = 0
        X = np.ndarray((nb_points, 2))
        while not i == nb_points:
            sample = np.random.uniform(-1, 1, 2)
            if np.linalg.norm(sample, ord=2) >1.:
                continue
            X[i] = sample
            i += 1
        return X
        
    if env == 'armarrow':
        i = 0
        X = np.ndarray((nb_points, 3))
        while not i == nb_points:
            sample = np.random.uniform(-1, 1, 3)
            if np.linalg.norm(sample[0:2], ord=2) > 1.:
                continue
            X[i] = sample
            i += 1
        return X
    

In [16]:
with open(os.path.join(path, "explored_states_history.pkl"), 'rb') as f:
    explored_states_history = pickle.load(f)

In [91]:
kls = np.zeros((49))
expl = np.zeros((49))
X_real = sample_in_attainable(explored_states_history[498].shape[0], 'armball')
for i in tqdm(range(49)):   
    #explored = explored_states_history[i*10][np.where(explored_states_history[i*10]!=[.6, .6])[1]]
    explored = explored_states_history[i*10]
    kls[i] = discretized_kl_div(explored)
    expl[i] = np.sum(np.linalg.norm(explored_states_history[i*10] - np.array([0.6, 0.6]), axis=1, ord=2) > 1e-3)
    expl[i] /= explored_states_history[i*10]

A Jupyter Widget




ValueError: setting an array element with a sequence.

In [89]:
arm = scipy.misc.imread('test.png')
cmap='Blues'
plt.style.use('seaborn-darkgrid')

In [90]:
fig = plt.figure(figsize=(9.5,3))
plt.title("Discretized $\mathbb{D}_{KL}[e\|a]$ for RGE-ISOMAP in Armball environment.")

ax1 = fig.add_subplot(1, 1, 1)
ax1.plot(kls,linewidth=1.)
ax1_2 = ax1.twinx()
ax1_2.plot(expl, linewidth=1., c='magenta', label="Points Explored")
ax1.set_ylim(3.5, 10.)
ax1.set_xlim(0., 50.)
ax1.set_xlabel("Exploration epochs (x100)")
ax1.set_ylabel("$\mathbb{D}_{KL}[e\|a]$")
ax1.add_line(matplotlib.lines.Line2D([5,5],   [kls[5]+.05,7], linewidth=1))
ax1.add_line(matplotlib.lines.Line2D([15,15], [kls[15]+.05,7], linewidth=1))
ax1.add_line(matplotlib.lines.Line2D([25,25], [kls[25]+.05,7], linewidth=1))
ax1.add_line(matplotlib.lines.Line2D([35,35], [kls[35]+.05,7], linewidth=1))
ax1.add_line(matplotlib.lines.Line2D([45,45], [kls[45]+.05,7], linewidth=1))
points = [5,15,25,35,45]
ax1.scatter(points, kls[points])

ax1_2.spines['right'].set_color('magenta')
ax1_2.tick_params(axis='x', colors='red')


ax = fig.add_axes([0.01, 0.55, .3, .3])
ax.imshow(arm, extent=[-1,1, -1, 1], alpha=.85)
ax.scatter(explored_states_history[50][:,0], explored_states_history[50][:,1], s=.5, alpha=.2)
ax.axis("off")

ax = fig.add_axes([0.19, 0.55, .3, .3])
ax.imshow(arm, extent=[-1,1, -1, 1], alpha=.85)
ax.scatter(explored_states_history[150][:,0], explored_states_history[150][:,1], s=.5, alpha=.2)
ax.axis("off")

ax = fig.add_axes([0.375, 0.55, .3, .3])
ax.imshow(arm, extent=[-1,1, -1, 1], alpha=.85)
ax.scatter(explored_states_history[250][:,0], explored_states_history[250][:,1], s=.5, alpha=.2)
ax.axis("off")

ax = fig.add_axes([0.555, 0.55, .3, .3])
ax.imshow(arm, extent=[-1,1, -1, 1], alpha=.85)
ax.scatter(explored_states_history[350][:,0], explored_states_history[350][:,1], s=.5, alpha=.2)
ax.axis("off")

ax = fig.add_axes([0.735, 0.55, .3, .3])
ax.imshow(arm, extent=[-1,1, -1, 1], alpha=.85)
ax.scatter(explored_states_history[450][:,0], explored_states_history[450][:,1], s=.5, alpha=.2)
ax.axis("off");

plt.tight_layout()
fig.savefig("Figures/exploration_plot_rge_isomap_armball.pdf")

<IPython.core.display.Javascript object>

## Evolution of closer points in latent space

In [671]:
with open(os.path.join(path, "explored_latents_history.pkl"), 'rb') as f:
    explored_latents_history = pickle.load(f)
with open(os.path.join(path, "explored_states_history.pkl"), 'rb') as f:
    explored_states_history = pickle.load(f)
training_latents = np.load(os.path.join(path, 'training_latents.npy')).astype(np.float64)

In [672]:
from sklearn.neighbors import KernelDensity, NearestNeighbors
kde = KernelDensity(bandwidth=.1)
kde.fit(training_latents)

KernelDensity(algorithm='auto', atol=0, bandwidth=0.1, breadth_first=True,
       kernel='gaussian', leaf_size=40, metric='euclidean',
       metric_params=None, rtol=0)

In [673]:
points = np.zeros((499,1000,2))
latents = np.zeros((499,1000,2))
for i in range(499):
    neig = NearestNeighbors(n_neighbors=1)
    neig.fit(explored_latents_history[i])
    neigs = neig.kneighbors(kde.sample(1000), n_neighbors=1)[1].squeeze().astype(np.int)
    points[i] = explored_states_history[i][neigs]
    latents[i] = explored_latents_history[i][neigs,0:2]

In [674]:
fig, ax = plt.subplots(1,2,figsize=(9,4.7))
fig.suptitle("Latent Goals and States Outcome for RGE-RFVAE with 2 latents, on Armball")
ax1 = ax[0]
ax2 = ax[1]
#ax1.set_xlim(-20,20)
#ax1.set_ylim(-20,20)
ax1.scatter(training_latents[:,0], training_latents[:,1], marker='.', alpha=.01, c='purple')
scatt = ax1.scatter(latents[48,:,0], latents[48,:,1], marker='.', alpha=.1)
leg = ax1.legend(['Training Set Projection', 'Goals'])
leg.legendHandles[0].set_alpha(1.)
leg.legendHandles[1].set_alpha(1.)
ax1.tick_params(labelcolor="white")
ax2.imshow(arm, extent=[-1,1, -1, 1], alpha=.3)
ax2.tick_params(labelcolor="white")
expl = ax2.scatter(explored_states_history[498][:,0], explored_states_history[498][:,1], marker='.', alpha=.1 )
leg = ax2.legend(['Achieved States'])
leg.legendHandles[0].set_alpha(1.)
plt.tight_layout()
plt.subplots_adjust(top=.9)
def update(epoch):
    scatt.set_offsets(latents[epoch,:,0:2])
    expl.set_offsets(explored_states_history[epoch])
    fig.canvas.draw()
ipywidgets.interact(update, epoch=(0, 498))


<IPython.core.display.Javascript object>

A Jupyter Widget

<function __main__.update>

In [522]:
fig.savefig("Figures/latent_exploration_rge_vae.pdf")