In [1]:
import pandas as pd
import os
import numpy as np
import plotly.express as px
import ast
from sklearn.decomposition import PCA
import seaborn as sn
import matplotlib.pyplot as plt
from pylab import rcParams
import pickle

TRAIN = "Train"
EVAL = "Eval"

In [32]:
def embeddings_pca(embeddings_df, n_components=3):
    pca = PCA(n_components=n_components)
    pca.fit(embeddings_df)
    #print('Components:', pca.components_)
    #print('Explained variance', pca.explained_variance_)
    #print('Explained variance ratio', pca.explained_variance_ratio_)
    embeddings = pca.transform(embeddings_df)
    return embeddings

In [40]:
def plott(embeddings, labels, title='', task_param='velocity', show_labels=False, n_components=3, xrange=None, yrange=None):
    stages = np.array([
        TRAIN if label['stage'] == 'train' else EVAL 
        for label in labels
    ])
    tasks = np.array([
        float("{0:.2f}".format(
            np.linalg.norm(label[task_param] - np.array([0, 0.85, 0.175]))
        ))
        for label in labels
    ])

    if n_components == 3:
        fig = px.scatter_3d(embeddings, x=0, y=1, z=2, color=tasks, symbol=stages)
    else:
        fig = px.scatter(embeddings, x=0, y=1, color=tasks, symbol=stages)

    fig.update_layout(
        title=title,
        xaxis_range=xrange,
        yaxis_range=yrange,
        legend=dict(x=-.1, y=-.1)
    )
    #fig.show()
    fig.write_image("images/{}.png".format(title))

In [4]:
experiments = []

for dirname, _, _ in os.walk('output'):
    if 'embeddings' in dirname.split('/')[-1]:
        experiments.append(dirname)

experiments.sort()
for i, experiment in enumerate(experiments):
    print(i, experiment)

0 output/cheetah-vel/2020_02_16_00_49_55/embeddings
1 output/cheetah-vel/2020_02_17_18_09_55/embeddings
2 output/cheetah-vel/2020_03_03_16_20_26/embeddings
3 output/cheetah-vel/2020_03_04_17_23_41/embeddings
4 output/cheetah-vel/2020_03_05_23_41_01/embeddings
5 output/cheetah-vel/2020_03_06_13_23_33/embeddings
6 output/cheetah-vel/2020_03_07_23_12_39/embeddings
7 output/cheetah-vel/2020_03_17_00_33_59/embeddings
8 output/cheetah-vel/2020_03_26_04_30_37/embeddings
9 output/cheetah-vel/2020_03_26_04_34_42/embeddings
10 output/cheetah-vel/2020_03_27_00_51_58/embeddings
11 output/reach-ml1/2020_04_09_01_49_41-baseline-3adapt/embeddings
12 output/reach-ml1/2020_04_15_22_29_42-baseline/embeddings
13 output/reach-ml1/2020_04_18_23_23_57-baseline-ood/embeddings
14 output/reach-ml1/2020_04_18_23_25_34-rwscale/embeddings
15 output/reach-ml1/2020_04_21_15_12_49-l2-ood/embeddings
16 output/reach-ml1/2020_04_21_15_13_40-intobs-ood/embeddings
17 output/reach-ml1/2020_04_23_11_14_21-intctx-ood/embedd

In [43]:
results = [
    #(0, 'Int. Obs 1', 'velocity'),
    #(1, 'Int. Ctx 1', 'velocity'),
    #(2, 'PEARL 1', 'velocity'),
    #(3, 'PEARL 2', 'velocity'),
    #(4, 'Int. Ctx 2', 'velocity'),
    #(5, 'Ctx Only Grads', 'velocity'),
    #(6, 'Int. Ctx 3', 'velocity'),
    #(7, 'Int. Ctx 4', 'velocity'),
    #(8, 'L2 0.1', 'velocity'),
    #(9, 'Dropout', 'velocity'),    
    #(10, 'L2 0.01 1', 'velocity', '00002'),
    #(11, 'L2 0.01 2', 'velocity'),
    #(12, 'PEARL 3 (Small)', 'velocity'),
    #(3, 'Reach Normalized', 'goal')
    #('output/reach-ml1/2020_04_15_22_29_42-baseline/embeddings', 'PEARL', 'goal_pos', '00010'),
    #(13, 'PEARL OOD', 'goal_pos', '00010'),
    #(14, 'Scale', 'goal_pos', '00010'),
    #(15, 'L2 0.01', 'goal_pos', '00010'),
    #(16, 'Int Obs OOD', 'goal_pos', '00010'),
    #(17, 'Int Ctx OOD', 'goal_pos', '00010'), # curiosity (ctx) out of dist on reach
    #('output/reach-ml1/2020_04_23_13_48_31-intctx', 'Int Ctx', 'goal_pos', '00010'),
    #('output/reach-ml1/2020_04_25_13_48_34-l2', 'L2 0.01', 'goal_pos', '00010'),
    #$(20, 'L2 0.01 OOD', 'goal_pos', '00010'), # L2 reg on encoder out of dist on reach,
    #('output/reach-ml1/2020_05_15_12_07_44-baseline/', 'Baseline', 'goal_pos', '00010'),
    ('output/reach-ml1/2020_05_15_12_08_37-l2', 'L2', 'goal_pos', '00010'),
    ('output/reach-ml1/2020_05_15_12_09_16-latent15', 'Latent 15', 'goal_pos', '00010'),
]

In [44]:
for experiment_path, experiment_name, task_param, adapt_index in results:
    embeddings = []
    for filename in os.listdir(experiment_path):
        if 'embeddings_' in filename:
            embeddings.append(os.path.join(experiment_path, filename))
    embeddings.sort(key=lambda x: float(x.split('_')[-1].split('.')[0]))

    frames = []
    xrange = [1000000, -1000000]
    yrange = [1000000, -1000000]
    for embedding in embeddings:
        file = open(embedding, 'rb')
        data = pickle.load(file) # {'embeddings', 'labels', 'tasks', 'num_train_tasks', 'epoch'}
        # [train + eval tasks, evals, rollouts, embeddings] -> [tasks+evals+rollouts, embedding]
        data['embeddings'] = np.transpose(data['embeddings'], (2, 0, 1, 3))
        data['embeddings'] = np.reshape(data['embeddings'], (
            data['embeddings'].shape[0], # rollouts
            -1, # stack all eval embeddings, task over task
            data['embeddings'].shape[3], # embedding dimension
        ))
        embeddings = embeddings_pca(data['embeddings'][-1], n_components=2)
        data['embeddings'] = embeddings
        frames.append(data)

        xrange = [
            np.min([ xrange[0], np.min(embeddings[:, 0]) ]),
            np.max([ xrange[1], np.max(embeddings[:, 0]) ])
        ]
        yrange = [
            np.min([ yrange[0], np.min(embeddings[:, 1]) ]),
            np.max([ yrange[1], np.max(embeddings[:, 1]) ])
        ]

    for frame in frames:
        plott(
            frame['embeddings'], 
            frame['labels'], 
            title=experiment_name + ' epoch {}'.format(frame['epoch']), 
            task_param=task_param, 
            show_labels=False,
            n_components=2,
            #xrange=xrange,
            #yrange=yrange
        )