## Analysis of principal actions (panel C)

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import pickle
from definitions import ROOT_DIR, NUM_MUSCLES
from sklearn.decomposition import PCA
from envs.environment_factory import EnvironmentFactory
from main_eval import load_vecnormalize, load_model
from itertools import combinations

In [None]:
# Load the small variation datasets
df_name_dict = {
    "baoding": "baoding_sv.h5",
    "early_baoding": "baoding_step_12.h5",
    "hand_pose": "hand_pose.h5",
    "hand_reach": "hand_reach.h5",
    "pen": "pen.h5",
    "reorient": "reorient.h5",

}
df_dict = {
    key: pd.read_hdf(os.path.join(ROOT_DIR, "data", "datasets", value))
    for key, value in df_name_dict.items()
}

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = NUM_MUSCLES
num_ep = 10
save_results = False
target_task_list = ["pen"]  # , "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "MyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,  
            "sparse": 0,
        },
        # # "goal_time_period": [4, 6],  # phase 2: (4, 6)
        # # "goal_xrange": (0.020, 0.030),  # phase 2: (0.020, 0.030)
        # # "goal_yrange": (0.022, 0.032),  # phase 2: (0.022, 0.032)
        # # # Randomization in physical properties of the baoding balls
        "obj_size_range": (
            0.020,
            0.022,
        ),  # (0.018, 0.024)   # Object size range. Nominal 0.022
        "obj_mass_range": (
            0.14,
            0.16,
            # # "obj_friction_change": (0.2, 0.001, 0.00002),  # (0.2, 0.001, 0.00002)
        ),
        "task_choice": "fixed",
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

for target_task in target_task_list:
    # First choose the pca to use to project the actions
    actions = np.vstack(df_dict[target_task].action)
    pca = PCA(n_components=n_comp).fit(actions)

    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)
    
    performance = []
    for k in range(n_comp):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:n_comp-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset()
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)
    if save_results:
        fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_action_components_{target_task}_{sorting}.pkl")
        fp_acts_pcs = open(fp, 'wb')
        pickle.dump(performance, fp_acts_pcs)
        fp_acts_pcs.close()

### Evaluate step 12 on all the other PCs

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = NUM_MUSCLES
num_ep = 10
save_results = False
target_task_list = ["baoding"]  #, "early_baoding", "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "CustomMyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        "task_choice": "fixed",
        "goal_time_period": (5, 5),
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/12_period_5/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/12_period_5/model.zip",
)


for target_task in target_task_list:
    # First choose the pca to use to project the actions
    actions = np.vstack(df_dict[target_task].action)
    pca = PCA(n_components=n_comp).fit(actions)

    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)

    performance = []
    for k in range(n_comp):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:n_comp-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset(random_phase=0)
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)

    if save_results:
        fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_step_12_{target_task}_{sorting}.pkl")
        fp_acts_pcs = open(fp, 'wb')
        pickle.dump(performance, fp_acts_pcs)
        fp_acts_pcs.close()

## Compute the performance when projecting the actions on any possible subset of tasks

In [None]:
# Optional: evaluate the model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
n_comp = NUM_MUSCLES
num_ep = 10
save_results = False
target_task_list = ["hand_pose", "hand_reach", "pen", "reorient"]  # "early_baoding",
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"

env_config = {
        "env_name": "MyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        # # "goal_time_period": [4, 6],  # phase 2: (4, 6)
        # # "goal_xrange": (0.020, 0.030),  # phase 2: (0.020, 0.030)
        # # "goal_yrange": (0.022, 0.032),  # phase 2: (0.022, 0.032)
        # # # Randomization in physical properties of the baoding balls
        "obj_size_range": (
            0.020,
            0.022,
        ),  # (0.018, 0.024)   # Object size range. Nominal 0.022
        "obj_mass_range": (
            0.14,
            0.16,
            # # "obj_friction_change": (0.2, 0.001, 0.00002),  # (0.2, 0.001, 0.00002)
        ),
        "task_choice": "fixed",
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

for i in range(2, len(target_task_list) + 1):
    target_task_combinations = list(combinations(target_task_list, i))
    for target_task_sublist in target_task_combinations:
        print(f"Loading data for tasks {target_task_sublist}...")
        actions_list = []
        for target_task in target_task_sublist:
            actions = np.vstack(df_dict[target_task].action)
            actions_list.append(actions)
        actions = np.vstack(actions_list)
        
        print(f"Computing pca for tasks {target_task_sublist}...")
        pca = PCA(n_components=n_comp).fit(actions)
        
        if save_results:
            out_path = os.path.join(ROOT_DIR, "data", "pca", f"pca_muscle_act_{'_'.join(target_task_sublist)}.joblib")
            with open(out_path, "wb") as f:
                joblib.dump(pca, f)

        env = EnvironmentFactory.create(**env_config)
        vecnormalize = load_vecnormalize(env_path, env)
        vecnormalize.training = False
        vecnormalize.norm_reward = False
        model = load_model(model_path)
        
        print(f"Running episodes for tasks {target_task_sublist}...")
        performance = []
        for k in range(n_comp):
            print("Environment: ", target_task_sublist, "component ", k)
            if sorting == "best_to_worst":
                components = pca.components_[:n_comp-k]
            elif sorting == "worst_to_best":
                components = pca.components_[k:]
            else:
                raise ValueError("Unknown sorting: ", sorting)

            performance_ep = []
            for n in range(num_ep):
                acts = []
                cum_reward = 0
                lstm_states = None
                obs = env.reset()
                episode_starts = np.ones((1,), dtype=bool)
                done = False
                timestep = 0
                while not done: 
                    timestep += 1
                    action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                            state=lstm_states,
                                                            episode_start=episode_starts,
                                                            deterministic=True,
                                                            )
                    
                    action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                    action_backproj = np.dot(action_proj,components)+pca.mean_
                    obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                    episode_starts = done
                    cum_reward += rewards
                performance_ep.append(cum_reward)
                print(f"Episode {n}, reward: {cum_reward}")
            performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
            data_point = {'components': components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
            performance.append(data_point)
        if save_results:
            target_task_str = "_".join(target_task_sublist)
            fp = os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"performance_{target_task_str}_{sorting}.pkl")
            fp_acts_pcs = open(fp, 'wb')
            pickle.dump(performance, fp_acts_pcs)
            fp_acts_pcs.close()

In [None]:
# Plot the performance curve for the different combinations of tasks
def plot_explained_variance_ratio(exp_var, label, color, ax=None, fig=None):
    if ax is None or fig is None:
        fig, ax = plt.subplots()
    ax.step(range(1, len(exp_var) + 1), exp_var, where='mid', linewidth=3, color=color, label=label)
    ax.set_ylabel('Cum. explained variance',fontsize=16)
    ax.set_ylim([0, 1.05])
    ax.tick_params(axis='both', labelsize=14)
    return fig, ax

def ev(X, X_approx, model_mean):
    return 1 - np.sum((X - X_approx)**2) / np.sum((X - model_mean)**2)

save_results = False
n_comp = NUM_MUSCLES
sorting = "best_to_worst"
target_task_list = ["hand_pose", "hand_reach", "pen", "reorient"]
task_to_print_dict = {
    "baoding": "Baoding",
    "early_baoding": "Baoding step 12",
    "hand_pose": "Hand Pose",
    "hand_reach": "Hand Reach",
    "pen": "Pen",
    "reorient": "Reorient"
}
muscle_act = np.vstack(df_dict["baoding"].muscle_act)
for i in range(2, len(target_task_list) + 1):
    target_task_combinations = list(combinations(target_task_list, i))
    for target_task_sublist in target_task_combinations:
        print(f"Loading data for tasks {target_task_sublist}...")
        actions_list = []
        for target_task in target_task_sublist:
            actions = np.vstack(df_dict[target_task].action)
            actions_list.append(actions)
        actions = np.vstack(actions_list)
        
        print(f"Computing pca for tasks {target_task_sublist}...")
        pca = PCA(n_components=n_comp).fit(actions)
        
        print(f"Loading precomputed task performance decay for tasks {target_task_sublist}...")
        target_task_str = "_".join(target_task_sublist)
        task_perf = pickle.load(open(os.path.join(ROOT_DIR, "data", "performance_decay", f"performance_{target_task_str}_{sorting}.pkl"), "rb"))
        perfs_mean = np.array([d['solved_frac_mean'] for d in task_perf[::-1]])
        perfs_sem = np.array([d['solved_frac_sem'] for d in task_perf[::-1]])
        
        
        muscle_act_projected = pca.transform(muscle_act)
        muscle_act_approx = pca.inverse_transform(muscle_act_projected)
        exp_var = ev(muscle_act, muscle_act_approx, pca.mean_)
        exp_var_ratio_list = [exp_var]
        for i in range(1, n_comp):
            muscle_act_projected[:, -i:] = 0
            muscle_act_approx = pca.inverse_transform(muscle_act_projected)
            exp_var = ev(muscle_act, muscle_act_approx, pca.mean_)
            exp_var_ratio_list.append(exp_var)
        exp_var_ratio_list.reverse()
        
        fig, ax1 = plt.subplots()
        ax2 = ax1.twinx()
        plot_explained_variance_ratio(exp_var_ratio_list, "Explained variance", 'black', ax=ax1, fig=fig)
        ax2.errorbar(np.arange(1, n_comp + 1), perfs_mean, yerr=perfs_sem, fmt='-', color="dodgerblue")
        ax1.set_xlabel('Number of PCs', fontsize=16, labelpad=5)
        ax2.set_ylabel('Solved fraction', fontsize=16, labelpad=5, color="dodgerblue")
        ax2.tick_params(axis='y', labelsize=14, colors="dodgerblue")
        ax1.set_title(", ".join([task_to_print_dict[task] for task in target_task_sublist]), fontsize=16)
        if save_results:
            plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_3", "performance_decay", f"perf_vs_comp_{target_task_str}.png"), format="png", dpi=600, bbox_inches="tight")
        plt.show()