## Analysis of principal actions (panel C)

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import pickle
from definitions import ROOT_DIR
from sklearn.decomposition import PCA
from helpers import make_parallel_envs, set_config
from stable_baselines3.common.vec_env import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.environment_factory import EnvironmentFactory
from sklearn.preprocessing import MinMaxScaler
from main_eval import load_vecnormalize, load_model
from matplotlib.cm import get_cmap
from itertools import combinations

In [None]:
# Load the small variation datasets
df_name_dict = {
    "baoding": "baoding_sv_no_activity.h5",
    "early_baoding": "baoding_step_12_no_activity.h5",
    "hand_pose": "hand_pose.h5",
    "hand_reach": "hand_reach.h5",
    "pen": "pen.h5",
    "reorient": "reorient.h5",

}
df_dict = {
    key: pd.read_hdf(os.path.join(ROOT_DIR, "data", "datasets", value))
    for key, value in df_name_dict.items()
}
# baoding_df = pd.read_hdf(os.path.join(ROOT_DIR, "data", "datasets", "baoding_sv_no_activity.h5"))
# control_df = pd.read_hdf(os.path.join(ROOT_DIR, "data", "datasets", "hand_pose.h5"))


# cw_path = os.path.join(ROOT_DIR, "data", "rollouts", "final_model_500_episodes_activations_info_small_variations_cw", "data.hdf")
# rollouts_cw = pd.read_hdf(cw_path)
# ccw_path = os.path.join(ROOT_DIR, "data", "rollouts", "final_model_500_episodes_activations_info_small_variations_ccw", "data.hdf")
# rollouts_ccw = pd.read_hdf(ccw_path)
# rollouts_df = pd.concat((rollouts_cw, rollouts_ccw)).reset_index()
# # rollouts_df = rollouts_ccw

# cw_path = os.path.join(ROOT_DIR, "data", "rollouts", "step_12_500_episodes_activations_info_cw", "data.hdf")
# early_baoding_rollouts_cw = pd.read_hdf(cw_path)
# ccw_path = os.path.join(ROOT_DIR, "data", "rollouts", "step_12_500_episodes_activations_info_ccw", "data.hdf")
# early_baoding_rollouts_ccw = pd.read_hdf(ccw_path)
# early_baoding_rollouts_df = pd.concat((early_baoding_rollouts_cw, early_baoding_rollouts_ccw)).reset_index()
# # early_baoding_rollouts_df = early_baoding_rollouts_ccw

# data_dir = os.path.join(ROOT_DIR, "data", "rollouts")
# control_tasks_dict = {
#     "hand_pose": "hand_pose_1000_episodes_lattice.h5",
#     "hand_reach": "hand_reach_1000_episodes_lattice.h5",
#     "reorient": "reorient_1000_episodes_lattice.h5",
#     "pen": "pen_1000_episodes_lattice.h5",
# }
# df_dict = {
#     key: pd.read_hdf(os.path.join(data_dir, "control", value))
#     for key, value in control_tasks_dict.items()
# }

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = 39
num_ep = 100
target_task_list = ["early_baoding"]  # , "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "MyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        # # "goal_time_period": [4, 6],  # phase 2: (4, 6)
        # # "goal_xrange": (0.020, 0.030),  # phase 2: (0.020, 0.030)
        # # "goal_yrange": (0.022, 0.032),  # phase 2: (0.022, 0.032)
        # # # Randomization in physical properties of the baoding balls
        "obj_size_range": (
            0.020,
            0.022,
        ),  # (0.018, 0.024)   # Object size range. Nominal 0.022
        "obj_mass_range": (
            0.14,
            0.16,
            # # "obj_friction_change": (0.2, 0.001, 0.00002),  # (0.2, 0.001, 0.00002)
        ),
        "task_choice": "fixed",
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

for target_task in target_task_list:
    # First choose the pca to use to project the actions
    if target_task == "baoding":
        actions = np.vstack(rollouts_df.action)
        pca = PCA(n_components=n_comp).fit(actions)
    elif target_task == "early_baoding":
        actions = np.vstack(early_baoding_rollouts_df.action)
        pca = PCA(n_components=n_comp).fit(actions)
    else:
        pca = joblib.load(os.path.join(ROOT_DIR, "data", "pca", f"pca_muscle_act_{target_task}.joblib"))


    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)
    
    performance = []
    for k in range(n_comp):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:n_comp-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset()
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)
        print(data_point)

    fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_action_components_{target_task}_{sorting}.pkl")
    fp_acts_pcs = open(fp, 'wb')
    pickle.dump(performance, fp_acts_pcs)
    fp_acts_pcs.close()

4. Load the performances vs. number of principal actions

In [None]:
sorting_modes = ["best_to_worst", "worst_to_best"]
target_task_list = ["baoding", "early_baoding", "hand_pose", "hand_reach", "pen", "reorient"]
task_to_print_dict = {
    "baoding": "Baoding",
    "early_baoding": "Baoding step 12",
    "hand_pose": "Hand Pose",
    "hand_reach": "Hand Reach",
    "pen": "Pen",
    "reorient": "Reorient"
}
task_perf_dict = {}
for target_task in target_task_list:
    task_dict = {
        mode: pickle.load(open(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_action_components_{target_task}_{mode}.pkl"),'rb')) for mode in sorting_modes
    }
    task_perf_dict[target_task] = task_dict
n_comp = 39

5. Plot the performance vs. number of dimensions removed in the action space

In [None]:
# Load the file from Basecamp : 'performance_actions_components_t'
sorting_modes = ["best_to_worst", "worst_to_best"]

perf_dict = {
        mode: pickle.load(open(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_action_components_baoding_{mode}.pkl"),'rb')) for mode in sorting_modes
    }

n_comp = 39

label_dict = {
    "best_to_worst": "High to low EV",
    "worst_to_best": "Low to high EV",
}
color_list = ["dodgerblue", "red"]
plt.figure()
for (mode, performance_components), color in zip(perf_dict.items(), color_list):
    perfs_mean = np.array([d['solved_frac_mean'] for d in performance_components[::-1]])
    perfs_sem = np.array([d['solved_frac_sem'] for d in performance_components[::-1]])

    # perfs_max = perfs_mean + perfs_sem
    # perfs_min = perfs_mean - perfs_sem
    # plt.plot(np.arange(n_comp), perfs_mean, linewidth=1, label=mode)
    # plt.fill_between(np.arange(n_comp), perfs_min, perfs_max, color='gray', alpha=0.3)
    plt.errorbar(np.arange(n_comp), perfs_mean, yerr=perfs_sem, fmt='-', label=label_dict[mode], color=color)
    plt.xlabel('Number of PCs',fontsize=16,labelpad=5)
    plt.ylabel('Solved fraction',fontsize=16,labelpad=5)
    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.subplots_adjust(left=0.2,bottom=0.23)
plt.legend(bbox_to_anchor=(1.02, 1.22), fontsize=14, ncol=2)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "pca_performance_contrib.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
# TODO: should we also add the cumulative explained variance?
label_dict = {
    "best_to_worst": "High to low EV",
    "worst_to_best": "Low to high EV",
}
task_colors = {
    "baoding": "dodgerblue",
    "early_baoding": "blue",
    "pen": "blueviolet",
    "reorient": "violet",
    "hand_reach": "orange",
    "hand_pose": "red",
}

plt.figure()
for task, perf_dict in task_perf_dict.items():
    mode = "best_to_worst"
    performance_components = perf_dict[mode]
    color = task_colors[task]
    perfs_mean = np.array([d['solved_frac_mean'] for d in performance_components[::-1]])
    perfs_sem = np.array([d['solved_frac_sem'] for d in performance_components[::-1]])

    # perfs_max = perfs_mean + perfs_sem
    # perfs_min = perfs_mean - perfs_sem
    # plt.plot(np.arange(n_comp), perfs_mean, linewidth=1, label=mode)
    # plt.fill_between(np.arange(n_comp), perfs_min, perfs_max, color='gray', alpha=0.3)
    plt.errorbar(np.arange(n_comp), perfs_mean, yerr=perfs_sem, fmt='-', label=task_to_print_dict[task], color=color)
plt.xlabel('Number of PCs (muscles)',fontsize=19,labelpad=5)
plt.ylabel('Solved fraction',fontsize=19,labelpad=5)
plt.yticks(fontsize=19)
plt.xticks(np.arange(0, 41, 10), fontsize=19)
plt.xlim((-1, 41))
plt.subplots_adjust(left=0.2,bottom=0.23)
# plt.legend(bbox_to_anchor=(1, 0.9), fontsize=14)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "pca_performance_contrib.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

### Evaluate step 12 on all the other PCs

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = 39
num_ep = 10
target_task_list = ["baoding", "early_baoding"]  #, "early_baoding", "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "CustomMyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        "task_choice": "fixed",
        "goal_time_period": (5, 5),
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/12_period_5/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/12_period_5/model.zip",
)


for target_task in target_task_list:
    # First choose the pca to use to project the actions
    if target_task == "baoding":
        actions = np.vstack(rollouts_df.action)
        pca = PCA(n_components=n_comp).fit(actions)
    elif target_task == "early_baoding":
        actions = np.vstack(early_baoding_rollouts_df.action)
        pca = PCA(n_components=n_comp).fit(actions)
    else:
        pca = joblib.load(os.path.join(ROOT_DIR, "data", "pca", f"pca_muscle_act_{target_task}.joblib"))

    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)

    performance = []
    for k in range(n_comp):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:n_comp-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset(random_phase=0)
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)
        print(data_point)

    fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_step_12_{target_task}_{sorting}.pkl")
    fp_acts_pcs = open(fp, 'wb')
    pickle.dump(performance, fp_acts_pcs)
    fp_acts_pcs.close()

In [None]:
sorting_modes = ["best_to_worst"]
target_task_list = ["baoding", "early_baoding", "hand_pose", "hand_reach", "pen", "reorient"]
task_to_print_dict = {
    "baoding": "Baoding",
    "early_baoding": "Baoding step 12",
    "hand_pose": "Hand Pose",
    "hand_reach": "Hand Reach",
    "pen": "Pen",
    "reorient": "Reorient"
}
task_perf_dict = {}
for target_task in target_task_list:
    task_dict = {
        sorting: pickle.load(open(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_step_12_{target_task}_{sorting}.pkl"),'rb')) for sorting in sorting_modes
    }
    task_perf_dict[target_task] = task_dict
n_comp = 39

In [None]:
label_dict = {
    "best_to_worst": "High to low EV",
    "worst_to_best": "Low to high EV",
}

task_colors = {
    "baoding": "dodgerblue",
    "early_baoding": "blue",
    "pen": "blueviolet",
    "reorient": "violet",
    "hand_reach": "orange",
    "hand_pose": "red",
}

plt.figure()
for task, perf_dict in task_perf_dict.items():
    mode = "best_to_worst"
    performance_components = perf_dict[mode]
    color = task_colors[task]
    perfs_mean = np.array([d['solved_frac_mean'] for d in performance_components[::-1]])
    perfs_sem = np.array([d['solved_frac_sem'] for d in performance_components[::-1]])

    plt.errorbar(np.arange(n_comp), perfs_mean, yerr=perfs_sem, fmt='-', label=task_to_print_dict[task], color=color)
plt.xlabel('Number of PCs (muscles)',fontsize=19,labelpad=5)
plt.ylabel('Solved fraction',fontsize=19,labelpad=5)
plt.yticks(fontsize=19)
plt.xticks(np.arange(0, 41, 10), fontsize=19)
plt.xlim((-1, 41))
plt.subplots_adjust(left=0.2,bottom=0.23)
# plt.legend(bbox_to_anchor=(1, 0.9), fontsize=14)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "pca_performance_contrib_step_12.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
# Plot the performance curve for the different combinations of tasks
sorting = "best_to_worst"
task_to_print_dict = {
    "baoding": "Baoding",
    "early_baoding": "Baoding step 12",
    "hand_pose": "Hand Pose",
    "hand_reach": "Hand Reach",
    "pen": "Pen",
    "reorient": "Reorient"
}
fp = fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_step_12_{target_task}_{sorting}.pkl")
task_perf = pickle.load(open(fp, "rb"))
perfs_mean = np.array([d['solved_frac_mean'] for d in task_perf[::-1]])
perfs_sem = np.array([d['solved_frac_sem'] for d in task_perf[::-1]])

plt.errorbar(np.arange(n_comp), perfs_mean, yerr=perfs_sem, fmt='-')
plt.xlabel('Number of PCs',fontsize=16,labelpad=5)
plt.ylabel('Solved fraction',fontsize=16,labelpad=5)
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
plt.subplots_adjust(left=0.2,bottom=0.23)
plt.title("Baoding step 12 of final Baoding")
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"perf_vs_comp_baoding_step_12_on_baoding.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

## Question: does the space spanned by early_baoding work if there are no perturbations?

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = 39
num_ep = 5
target_task_list = ["early_baoding"]  # , "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "worst_to_best"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "MyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        "goal_time_period": [5, 5],  # phase 2: (4, 6)
        "goal_xrange": (0.025, 0.025),  # phase 2: (0.020, 0.030)
        "goal_yrange": (0.028, 0.028),  # phase 2: (0.022, 0.032)
        # # # Randomization in physical properties of the baoding balls
        "obj_size_range": (
            0.022,
            0.022,
        ),  # (0.018, 0.024)   # Object size range. Nominal 0.022
        "obj_mass_range": (
            0.043,
            0.043,
            # # "obj_friction_change": (0.2, 0.001, 0.00002),  # (0.2, 0.001, 0.00002)
        ),
        "task_choice": "fixed",
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

for target_task in target_task_list:
    # First choose the pca to use to project the actions
    if target_task == "baoding":
        actions = np.vstack(rollouts_df.action)
        pca = PCA(n_components=n_comp).fit(actions)
    elif target_task == "early_baoding":
        actions = np.vstack(early_baoding_rollouts_df.action)
        pca = PCA(n_components=n_comp).fit(actions)
    else:
        pca = joblib.load(os.path.join(ROOT_DIR, "data", "pca", f"pca_muscle_act_{target_task}.joblib"))


    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)
    
    performance = []
    for k in range(n_comp):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:n_comp-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset()
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)
        print(data_point)

    fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_phase_1_{target_task}_{sorting}.pkl")
    fp_acts_pcs = open(fp, 'wb')
    pickle.dump(performance, fp_acts_pcs)
    fp_acts_pcs.close()

## Compute the performance when projecting the actions on any possible subset of tasks

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = 39
num_ep = 100
target_task_list = ["hand_pose", "hand_reach", "pen", "reorient"]  # "early_baoding",
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"

env_config = {
        "env_name": "MyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        # # "goal_time_period": [4, 6],  # phase 2: (4, 6)
        # # "goal_xrange": (0.020, 0.030),  # phase 2: (0.020, 0.030)
        # # "goal_yrange": (0.022, 0.032),  # phase 2: (0.022, 0.032)
        # # # Randomization in physical properties of the baoding balls
        "obj_size_range": (
            0.020,
            0.022,
        ),  # (0.018, 0.024)   # Object size range. Nominal 0.022
        "obj_mass_range": (
            0.14,
            0.16,
            # # "obj_friction_change": (0.2, 0.001, 0.00002),  # (0.2, 0.001, 0.00002)
        ),
        "task_choice": "fixed",
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

for i in range(2, len(target_task_list) + 1):
    target_task_combinations = list(combinations(target_task_list, i))
    for target_task_sublist in target_task_combinations:
        print(f"Loading data for tasks {target_task_sublist}...")
        actions_list = []
        for target_task in target_task_sublist:
            # First choose the pca to use to project the actions
            if target_task == "baoding":
                actions = np.vstack(rollouts_df.action)
            elif target_task == "early_baoding":
                actions = np.vstack(early_baoding_rollouts_df.action)
            else:
                actions = np.vstack(df_dict[target_task].action)
            actions_list.append(actions)
        actions = np.vstack(actions_list)
        
        print(f"Computing pca for tasks {target_task_sublist}...")
        pca = PCA(n_components=n_comp).fit(actions)
        out_path = os.path.join(ROOT_DIR, "data", "pca", f"pca_muscle_act_{'_'.join(target_task_sublist)}.joblib")
        with open(out_path, "wb") as f:
            joblib.dump(pca, f)
        env = EnvironmentFactory.create(**env_config)
        vecnormalize = load_vecnormalize(env_path, env)
        vecnormalize.training = False
        vecnormalize.norm_reward = False
        model = load_model(model_path)
        
        print(f"Running episodes for tasks {target_task_sublist}...")
        performance = []
        for k in range(n_comp):
            print("Environment: ", target_task_sublist, "component ", k)
            if sorting == "best_to_worst":
                components = pca.components_[:n_comp-k]
            elif sorting == "worst_to_best":
                components = pca.components_[k:]
            else:
                raise ValueError("Unknown sorting: ", sorting)

            performance_ep = []
            for n in range(num_ep):
                acts = []
                cum_reward = 0
                lstm_states = None
                obs = env.reset()
                episode_starts = np.ones((1,), dtype=bool)
                done = False
                timestep = 0
                while not done: 
                    timestep += 1
                    action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                            state=lstm_states,
                                                            episode_start=episode_starts,
                                                            deterministic=True,
                                                            )
                    
                    action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                    action_backproj = np.dot(action_proj,components)+pca.mean_
                    obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                    episode_starts = done
                    cum_reward += rewards
                performance_ep.append(cum_reward)
                print(f"Episode {n}, reward: {cum_reward}")
            performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
            data_point = {'components': components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
            performance.append(data_point)
            print(data_point)

        target_task_str = "_".join(target_task_sublist)
        fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_{target_task_str}_{sorting}.pkl")
        fp_acts_pcs = open(fp, 'wb')
        pickle.dump(performance, fp_acts_pcs)
        fp_acts_pcs.close()

In [None]:
# Plot the performance curve for the different combinations of tasks
def plot_explained_variance_ratio(exp_var, label, color, ax=None, fig=None):
    if ax is None or fig is None:
        fig, ax = plt.subplots()
    ax.step(range(1, len(exp_var) + 1), exp_var, where='mid', linewidth=3, color=color, label=label)
    ax.set_ylabel('Cum. explained variance',fontsize=16)
    ax.set_ylim([0, 1.05])
    # plt.legend(fontsize=14,loc='best')
    ax.tick_params(axis='both', labelsize=14)
    # ax.axhline(y=0.95, color='black', linestyle='--', alpha=0.5)
    # ax.axhline(y=0.85, color='black', linestyle='--', alpha=0.5)
    # ax.text(0, 0.96, '95%', color = 'black', fontsize=18)
    # ax.text(0, 0.86, '85%', color = 'black', fontsize=18)
    return fig, ax

def ev(X, X_approx, model_mean):
    return 1 - np.sum((X - X_approx)**2) / np.sum((X - model_mean)**2)

n_comp = 39
sorting = "best_to_worst"
target_task_list = ["hand_pose", "hand_reach", "pen", "reorient"]
task_to_print_dict = {
    "baoding": "Baoding",
    "early_baoding": "Baoding step 12",
    "hand_pose": "Hand Pose",
    "hand_reach": "Hand Reach",
    "pen": "Pen",
    "reorient": "Reorient"
}
muscle_act = np.vstack(rollouts_df.muscle_act)
for i in range(2, len(target_task_list) + 1):
    target_task_combinations = list(combinations(target_task_list, i))
    for target_task_sublist in target_task_combinations:
        target_task_str = "_".join(target_task_sublist)
        task_perf = pickle.load(open(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_{target_task_str}_{sorting}.pkl"), "rb"))
        perfs_mean = np.array([d['solved_frac_mean'] for d in task_perf[::-1]])
        perfs_sem = np.array([d['solved_frac_sem'] for d in task_perf[::-1]])
        
        pca = joblib.load(os.path.join(ROOT_DIR, "data", "pca", f"pca_muscle_act_{target_task_str}.joblib"))
        
        muscle_act_projected = pca.transform(muscle_act)
        muscle_act_approx = pca.inverse_transform(muscle_act_projected)
        exp_var = ev(muscle_act, muscle_act_approx, pca.mean_)
        exp_var_ratio_list = [exp_var]
        for i in range(1, n_comp):
            muscle_act_projected[:, -i:] = 0
            muscle_act_approx = pca.inverse_transform(muscle_act_projected)
            exp_var = ev(muscle_act, muscle_act_approx, pca.mean_)
            exp_var_ratio_list.append(exp_var)
        exp_var_ratio_list.reverse()
        
        fig, ax1 = plt.subplots()
        ax2 = ax1.twinx()
        plot_explained_variance_ratio(exp_var_ratio_list, "Explained variance", 'black', ax=ax1, fig=fig)
        ax2.errorbar(np.arange(1, n_comp + 1), perfs_mean, yerr=perfs_sem, fmt='-', color="dodgerblue")
        ax1.set_xlabel('Number of PCs', fontsize=16, labelpad=5)
        ax2.set_ylabel('Solved fraction', fontsize=16, labelpad=5, color="dodgerblue")
        ax2.tick_params(axis='y', labelsize=14, colors="dodgerblue")
        ax1.set_title(", ".join([task_to_print_dict[task] for task in target_task_sublist]), fontsize=16)
        plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"perf_vs_comp_{target_task_str}.png"), format="png", dpi=600, bbox_inches="tight")
        plt.show()

In [None]:
# Sample data
x = range(10)
y1 = [i**2 for i in x]
y2 = [i*10 for i in x]

# Create the first plot
fig, ax1 = plt.subplots()

# Plot the first curve with the first y-axis
ax1.plot(x, y1, color='blue')
ax1.set_ylabel('Y1', color='blue')

# Create a twin axes sharing the same x-axis
ax2 = ax1.twinx()

# Plot the second curve with the second y-axis
ax2.plot(x, y2, color='red')
ax2.set_ylabel('Y2', color='red')

plt.show()

### B. Visualize the first principal actions
1. Load the principal actions

In [None]:
# Load the file from Basecamp : 'performance_actions_components_t'
performance_components = pickle.load(open(os.path.join(ROOT_DIR, "data", "basecamp", "performance_action_components_t.pkl"),'rb'))
principal_actions = [d['components'] for d in performance_components][0]

2. Save frames of each action

In [None]:
plot_every = 15
for action_idx in range(5):
    env_name = "CustomMyoBaodingBallsP2"

    config = set_config(period=5,rot_dir="cw")
    rollouts = []

    eval_env = EnvironmentFactory.create(env_name, **config)
    frames = []

    for n in range(1): # Just one episode
        eval_env.reset()
        qpos = eval_env.init_qpos.copy()
        qvel = eval_env.init_qvel.copy()
        qpos[25] = 10
        qpos[32] = 10
        eval_env.sim.model.site_pos[eval_env.target1_sid, 2] = 10
        eval_env.sim.model.site_pos[eval_env.target2_sid, 2] = 10
        
        eval_env.set_state(qpos, qvel)
        timestep = 0
        while timestep < 16 : 
            curr_frame = eval_env.render_camera_offscreen(['hand_top', 'hand_bottom', 'hand_side_inter', 'hand_side_exter', 'plam_lookat'])
            frames.append(curr_frame)
            timestep += 1
            obs, rewards, done, info = eval_env.step(principal_actions[action_idx])

    cam_frames = [l[1] for l in frames[::plot_every]]
    num_frames = len(cam_frames)

    print("Plotting component ", action_idx)
    # Create a figure with a single row and the number of columns equal to the number of frames
    fig, axes = plt.subplots(1, num_frames, figsize=(num_frames * 4, 4))

    # Remove axes for all subplots
    for ax in axes:
        ax.axis('off')

    # Display each frame in its respective subplot
    for i, frame in enumerate(cam_frames):
        axes[i].imshow(frame[120:, 160:480])

    plt.tight_layout()  # Adjust spacing between subplots
    # plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"principal_component_{action_idx}_frames.png"), format="png", dpi=600, bbox_inches="tight")
    plt.show()


In [None]:
# Principal components of the pose (rotation CCW)

def plot_hand_pos(hand_pos, fig=None, ax=None):
    env_name = "CustomMyoBaodingBallsP2"
    env = EnvironmentFactory.create(env_name,)

    env.sim.model.site_pos[env.target1_sid, 2] = 10
    env.sim.model.site_pos[env.target2_sid, 2] = 10
    qpos = np.concatenate((hand_pos, np.zeros(14)))
    qpos[25] = 10
    qpos[32] = 10
    qvel = np.zeros(35)
    env.set_state(qpos, qvel)
    frame = env.render_camera_offscreen(['hand_bottom'])[0]

    if fig is None:
        fig, ax = plt.subplots(figsize=(8, 8))

    ax.axis('off')
    ax.imshow(frame[120:, 160:480])
    return fig, ax


n_comp = 23
hand_pos_mat = np.stack(rollouts_ccw.apply(lambda x: x.observation[:23], axis=1))
pca = PCA(n_components=n_comp).fit(hand_pos_mat)

# out_path = os.path.join(ROOT_DIR, "data", "pca", "pca_pose_ccw.joblib")
# joblib.dump(pca, out_path)


In [None]:
pca_path = os.path.join(ROOT_DIR, "data", "pca", "pca_pose_ccw.joblib")
pca = joblib.load(pca_path)
    
num_frames = 2
num_components = 5
for i in range(num_components):  # Visualize the first principal components of the pose 
    fig, axes = plt.subplots(1, num_frames, figsize=(num_frames * 6, 6))
    for ax in axes:
        ax.axis('off')
    plot_hand_pos(pca.mean_ + 1 * pca.components_[i], fig, axes[0])
    plot_hand_pos(pca.mean_ - 1 * pca.components_[i], fig, axes[1])
    fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"pose_principal_component_{i}_frames.png"), format="png", dpi=600, bbox_inches="tight")

    fig.show()

### C. Heatmap of principal actions

1. Load the principal actions (same data as that generated in point A.3.and loaded in point A.4.) 

In [None]:
# Load the file from Basecamp : 'performance_actions_components_t'
performance_components = pickle.load(open(os.path.join(ROOT_DIR, "data", "basecamp", "performance_action_components_t.pkl"),'rb'))
principal_actions = [d['components'] for d in performance_components][0]

2. Plot the heatmap

In [None]:
fig = sns.heatmap(pd.DataFrame(principal_actions[:13]),cmap="coolwarm").get_figure()
plt.xlabel('Muscles',fontsize=21)
plt.ylabel('Principal actions',fontsize=21)
plt.yticks(rotation=0,fontsize=17)
plt.xticks(ticks=np.arange(1,40,3),labels=np.arange(1,40,3),rotation=45,fontsize=17)
plt.subplots_adjust(left=0.15,bottom=0.2)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "principal_components_muscle_weights.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
np.arange(0,199,21)

### D. Principal action vs. phase of rotation (time step)

3. a. Compute the average principal actions \
b. Plot the PAs weights vs. time for the rotation phase

In [None]:
num_muscles = 39
num_components_plot = 10
t_min = 1; t_max = 200  # Remove the first step

pca = PCA(n_components=num_muscles)
mean_actions = np.vstack(rollouts_cw.groupby("step")["action"].mean())
mean_weights = pca.fit_transform(mean_actions)

minmax = MinMaxScaler(feature_range=(-1,1))
weights=[]
for j in range(num_components_plot):
    norm_weights = minmax.fit_transform(mean_weights[t_min:,j].reshape(t_max-t_min,1))
    # norm_weights = minmax.fit_transform(mean_weights[:, j].reshape(200, 1))
    weights.append(norm_weights)

fig, ax = plt.subplots(figsize=(20, 2.8)) 
sns.heatmap(pd.DataFrame(np.squeeze(weights)), cmap="coolwarm", ax=ax)
# sns.heatmap(mean_weights.T,cmap="coolwarm", ax=ax)
plt.yticks(ticks=np.arange(1,num_components_plot + 1,1)-0.5,labels=np.arange(1,num_components_plot + 1,1),rotation=0,fontsize=16)
plt.xticks(rotation=45,ticks=np.linspace(t_min-1, t_max-1, 6),labels=np.linspace(t_min//40, t_max//40, 6),fontsize=16)
plt.xlabel('Time [s]',fontsize=21)
plt.ylabel('Principal\nactions',fontsize=21)
plt.subplots_adjust(left=0.15,bottom=0.2)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"top_{num_components_plot}_principal_components_episode.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
num_joints = 23
num_components_plot = 10
t_min = 1; t_max = 200 # Remove the first step

pca = PCA(n_components=num_joints)
mean_poses = np.vstack(rollouts_cw.groupby("step")["observation"].mean())[:, :23]
mean_weights = pca.fit_transform(mean_poses)

minmax = MinMaxScaler(feature_range=(-1,1))
weights=[]
for j in range(num_components_plot):
    norm_weights = minmax.fit_transform(mean_weights[t_min:,j].reshape(t_max-t_min,1))
    weights.append(norm_weights)

fig, ax = plt.subplots(figsize=(20, 2.8)) 
sns.heatmap(pd.DataFrame(np.squeeze(weights)), cmap="coolwarm", ax=ax)
# sns.heatmap(mean_weights.T,cmap="coolwarm", ax=ax)
plt.yticks(ticks=np.arange(1,num_components_plot + 1,1)-0.5,labels=np.arange(1,num_components_plot + 1,1),rotation=0,fontsize=16)
plt.xticks(rotation=45,ticks=np.linspace(t_min-1, t_max-1, 6),labels=np.linspace(t_min//40, t_max//40, 6),fontsize=16)
plt.xlabel('Time [s]',fontsize=21)
plt.ylabel('Principal\nposes',fontsize=21)
plt.subplots_adjust(left=0.15,bottom=0.2)
# plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"top_{num_components_plot}_principal_poses_episode.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
episode_num = 100
obs_list = rollouts_df[(rollouts_df.episode == episode_num) & (rollouts_df.task == "cw")].observation.to_list()
ball1_pos = np.vstack([o[23:26] for o in obs_list]) * 100
ball2_pos = np.vstack([o[29:32] for o in obs_list]) * 100

target1_pos = np.vstack([o[35:38] for o in obs_list]) * 100
target2_pos = np.vstack([o[38:41] for o in obs_list]) * 100

target1_center = np.mean(target1_pos, axis=0)
target2_center = np.mean(target2_pos, axis=0)

ball1_rel_pos = ball1_pos - target1_center
ball2_rel_pos = ball2_pos - target2_center

ball1_angle = np.arctan2(ball1_rel_pos[:, 1], ball1_rel_pos[:, 0]) / np.pi * 180
ball2_angle = np.arctan2(ball2_rel_pos[:, 1], ball2_rel_pos[:, 0]) / np.pi * 180

fig, ax = plt.subplots(figsize=(10, 1.5))

cmap = get_cmap("coolwarm")

ax.plot(ball1_rel_pos[:, 0], label="x ball 1", color=cmap(0.))
ax.plot(ball1_rel_pos[:, 1], label="y ball 1", color=cmap(0.15))
ax.plot(ball1_rel_pos[:, 2], label="z ball 1", color=cmap(0.3))

ax.plot(ball2_rel_pos[:, 0], label="x ball 2", color=cmap(0.99))
ax.plot(ball2_rel_pos[:, 1], label="y ball 2", color=cmap(0.85))
ax.plot(ball2_rel_pos[:, 2], label="z ball 2", color=cmap(0.7))

ax.xaxis.set_ticks([])
ax.set_ylabel("Position [cm]")
ax.legend(bbox_to_anchor=(1.15, 1.11))
# plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"ball_coords_episode_{episode_num}.png"), format="png", dpi=1000, bbox_inches="tight")
fig.show()

In [None]:
# Plot the time series of the joint positions
episode_num = 100
obs_list = rollouts_df[(rollouts_df.episode == episode_num) & (rollouts_df.task == "cw")].observation.to_list()
joint_pos = np.vstack([o[:23] for o in obs_list])

fig, ax = plt.subplots(figsize=(10, 1.5))

cmap = get_cmap("plasma")

for joint_idx, joint_pose in enumerate(joint_pos.T):
    color = cmap((joint_idx + 1) / (joint_pos.shape[1] + 2))
    ax.plot(joint_pose, color=color, alpha=0.5)

# ax.plot(ball1_rel_pos[:, 0], label="x ball 1", color=cmap(0.))
# ax.plot(ball1_rel_pos[:, 1], label="y ball 1", color=cmap(0.15))
# ax.plot(ball1_rel_pos[:, 2], label="z ball 1", color=cmap(0.3))

# ax.plot(ball2_rel_pos[:, 0], label="x ball 2", color=cmap(0.99))
# ax.plot(ball2_rel_pos[:, 1], label="y ball 2", color=cmap(0.85))
# ax.plot(ball2_rel_pos[:, 2], label="z ball 2", color=cmap(0.7))


ax.xaxis.set_ticks([])
ax.set_ylabel("Angle [rad]")
# ax.legend(bbox_to_anchor=(1.15, 1.11))
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"joint_pos_episode_{episode_num}.png"), format="png", dpi=1000, bbox_inches="tight")
fig.show()

In [None]:
rollouts_df.keys()

In [None]:
# Plot the time series of the muscle activations
episode_num = 100
muscle_act = np.vstack(rollouts_df[(rollouts_df.episode == episode_num) & (rollouts_df.task == "cw")].muscle_act)
muscle_act = muscle_act

fig, ax = plt.subplots(figsize=(10, 1.5))

cmap = get_cmap("plasma")

for muscle_idx, act in enumerate(muscle_act.T):
    color = cmap((muscle_idx + 1) / (muscle_act.shape[1] + 2))
    ax.plot(act, color=color, alpha=0.3)

ax.xaxis.set_ticks([])
ax.set_ylabel("Muslce activation")
ax.set_yticks([0, 0.5, 1])
ax.set_ylim([-0.1, 1.1])
# ax.legend(bbox_to_anchor=(1.15, 1.11))
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"muscle_act_episode_{episode_num}.png"), format="png", dpi=1000, bbox_inches="tight")
fig.show()

In [None]:
np.max(np.vstack(rollouts_df.muscle_act))

### Save screenshot at different steps of the episode

In [None]:
num_ep = 1
n_comp = 39

PATH_TO_NORMALIZED_ENV = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
PATH_TO_PRETRAINED_NET = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

env_name = "CustomMyoBaodingBallsP2"
render = False

config = set_config(period=5,rot_dir="cw")
rollouts = []

envs = make_parallel_envs(env_name, config, num_env=1)
envs = VecNormalize.load(PATH_TO_NORMALIZED_ENV, envs)
envs.training = False
envs.norm_reward = False
custom_objects = {
    "learning_rate": lambda _: 0,
    "lr_schedule": lambda _: 0,
    "clip_range": lambda _: 0,
}
model = RecurrentPPO.load(
        PATH_TO_PRETRAINED_NET, env=envs, device="cpu", custom_objects=custom_objects
    )

eval_model = model
eval_env = EnvironmentFactory.create(env_name,**config)
frames = []
for n in range(num_ep):
    print(n)
    cum_reward = 0
    lstm_states = None
    obs = eval_env.reset()
    episode_starts = np.ones((1,), dtype=bool)
    done = False
    timestep = 0
    while not done: 
        curr_frame = eval_env.render_camera_offscreen(['hand_top', 'hand_bottom', 'hand_side_inter', 'hand_side_exter', 'plam_lookat'])
        frames.append(curr_frame)            
        timestep += 1
        action, lstm_states = eval_model.predict(envs.normalize_obs(obs),
                                                state=lstm_states,
                                                episode_start=episode_starts,
                                                deterministic=True,
                                                )
                                                    
        obs, rewards, done, info = eval_env.step(action)
        episode_starts = done
        cum_reward += rewards   
    print(cum_reward)


In [None]:
cam_frames = [l[1] for l in frames[20::30]]
num_frames = len(cam_frames)

# Create a figure with a single row and the number of columns equal to the number of frames
fig, axes = plt.subplots(1, num_frames, figsize=(num_frames * 8, 8))

# Remove axes for all subplots
for ax in axes:
    ax.axis('off')

# Display each frame in its respective subplot
for i, frame in enumerate(cam_frames):
    axes[i].imshow(frame)

plt.tight_layout()  # Adjust spacing between subplots
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", f"episode_frames.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()