In [4]:
import os
import numpy as np
import pandas as pd
import pickle
from definitions import ROOT_DIR, NUM_MUSCLES
from sklearn.decomposition import PCA
from envs.environment_factory import EnvironmentFactory
from main_eval import load_vecnormalize, load_model

In [2]:
# Load the small variation datasets
df_name_dict = {
    "baoding": "baoding_sv_no_activity.h5",
    "early_baoding": "baoding_step_12_no_activity.h5",
    "hand_pose": "hand_pose.h5",
    "hand_reach": "hand_reach.h5",
    "pen": "pen.h5",
    "reorient": "reorient.h5",

}
df_dict = {
    key: pd.read_hdf(os.path.join(ROOT_DIR, "data", "datasets", value))
    for key, value in df_name_dict.items()
}

In [6]:
# Evaluate the final sds model when projecting the actions on a subset of the PCs
# best_to_worst removes low variance components first, worst to best remove the 
# high variance components first
num_ep = 100
target_task_list = ["baoding", "early_baoding", "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "MyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        # # "goal_time_period": [4, 6],  # phase 2: (4, 6)
        # # "goal_xrange": (0.020, 0.030),  # phase 2: (0.020, 0.030)
        # # "goal_yrange": (0.022, 0.032),  # phase 2: (0.022, 0.032)
        # # # Randomization in physical properties of the baoding balls
        "obj_size_range": (
            0.020,
            0.022,
        ),  # (0.018, 0.024)   # Object size range. Nominal 0.022
        "obj_mass_range": (
            0.14,
            0.16,
            # # "obj_friction_change": (0.2, 0.001, 0.00002),  # (0.2, 0.001, 0.00002)
        ),
        "task_choice": "fixed",
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

for target_task in target_task_list:
    # First choose the pca to use to project the actions
    actions = np.vstack(df_dict[target_task].action)
    pca = PCA(n_components=NUM_MUSCLES).fit(actions)

    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)
    
    performance = []
    for k in range(NUM_MUSCLES):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:NUM_MUSCLES-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset()
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)
        print(data_point)

    fp = os.path.join(ROOT_DIR, "data", "performance_decay", f"performance_final_sds_{target_task}_{sorting}.pkl")
    fp_acts_pcs = open(fp, 'wb')
    pickle.dump(performance, fp_acts_pcs)
    fp_acts_pcs.close()

env path /Users/albertochiappa/Dev/rl/MyoChallengeAnalysis/trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl
Environment:  baoding component  0




Episode 0, reward: 720.0
Episode 1, reward: 705.0
Episode 2, reward: 685.0
Episode 3, reward: 725.0
Episode 4, reward: 725.0
Episode 5, reward: 725.0
Episode 6, reward: 540.0
Episode 7, reward: 700.0
Episode 8, reward: 725.0
Episode 9, reward: 720.0
Episode 10, reward: 715.0
Episode 11, reward: 720.0
Episode 12, reward: 715.0
Episode 13, reward: 730.0
Episode 14, reward: 720.0
Episode 15, reward: 725.0
Episode 16, reward: 720.0
Episode 17, reward: 725.0
Episode 18, reward: 600.0
Episode 19, reward: 720.0
Episode 20, reward: 725.0
Episode 21, reward: 715.0
Episode 22, reward: 735.0
Episode 23, reward: 720.0
Episode 24, reward: 715.0
Episode 25, reward: 720.0
Episode 26, reward: 725.0
Episode 27, reward: 735.0
Episode 28, reward: 730.0
Episode 29, reward: 730.0
Episode 30, reward: 730.0
Episode 31, reward: 695.0
Episode 32, reward: 715.0
Episode 33, reward: 700.0
Episode 34, reward: 725.0
Episode 35, reward: 730.0
Episode 36, reward: 720.0
Episode 37, reward: 720.0
Episode 38, reward: 71



Episode 0, reward: 720.0
Episode 1, reward: 325.0
Episode 2, reward: 740.0
Episode 3, reward: 735.0
Episode 4, reward: 725.0
Episode 5, reward: 720.0
Episode 6, reward: 710.0
Episode 7, reward: 735.0
Episode 8, reward: 735.0
Episode 9, reward: 720.0
Episode 10, reward: 730.0
Episode 11, reward: 735.0
Episode 12, reward: 720.0
Episode 13, reward: 730.0
Episode 14, reward: 730.0
Episode 15, reward: 725.0
Episode 16, reward: 725.0
Episode 17, reward: 725.0
Episode 18, reward: 680.0
Episode 19, reward: 720.0
Episode 20, reward: 690.0
Episode 21, reward: 700.0
Episode 22, reward: 735.0
Episode 23, reward: 725.0
Episode 24, reward: 700.0
Episode 25, reward: 725.0
Episode 26, reward: 610.0
Episode 27, reward: 740.0
Episode 28, reward: 730.0
Episode 29, reward: 730.0
Episode 30, reward: 685.0
Episode 31, reward: 725.0
Episode 32, reward: 720.0
Episode 33, reward: 720.0
Episode 34, reward: 730.0
Episode 35, reward: 675.0
Episode 36, reward: 720.0
Episode 37, reward: 725.0
Episode 38, reward: 72



Episode 0, reward: 720.0
Episode 1, reward: 725.0
Episode 2, reward: 700.0
Episode 3, reward: 725.0
Episode 4, reward: 725.0
Episode 5, reward: 720.0
Episode 6, reward: 650.0
Episode 7, reward: 730.0
Episode 8, reward: 735.0
Episode 9, reward: 715.0
Episode 10, reward: 725.0
Episode 11, reward: 635.0
Episode 12, reward: 710.0
Episode 13, reward: 730.0
Episode 14, reward: 720.0
Episode 15, reward: 725.0
Episode 16, reward: 725.0
Episode 17, reward: 725.0
Episode 18, reward: 725.0
Episode 19, reward: 640.0
Episode 20, reward: 590.0
Episode 21, reward: 720.0
Episode 22, reward: 735.0
Episode 23, reward: 690.0
Episode 24, reward: 715.0
Episode 25, reward: 280.0
Episode 26, reward: 730.0
Episode 27, reward: 735.0
Episode 28, reward: 700.0
Episode 29, reward: 730.0
Episode 30, reward: 730.0
Episode 31, reward: 725.0
Episode 32, reward: 725.0
Episode 33, reward: 695.0
Episode 34, reward: 730.0
Episode 35, reward: 735.0
Episode 36, reward: 715.0
Episode 37, reward: 720.0
Episode 38, reward: 73



Episode 0, reward: 720.0
Episode 1, reward: 725.0
Episode 2, reward: 735.0
Episode 3, reward: 725.0
Episode 4, reward: 700.0
Episode 5, reward: 720.0
Episode 6, reward: 665.0
Episode 7, reward: 735.0
Episode 8, reward: 740.0
Episode 9, reward: 720.0
Episode 10, reward: 725.0
Episode 11, reward: 740.0
Episode 12, reward: 720.0
Episode 13, reward: 730.0
Episode 14, reward: 725.0
Episode 15, reward: 725.0
Episode 16, reward: 720.0
Episode 17, reward: 725.0
Episode 18, reward: 725.0
Episode 19, reward: 725.0
Episode 20, reward: 725.0
Episode 21, reward: 715.0
Episode 22, reward: 735.0
Episode 23, reward: 725.0
Episode 24, reward: 720.0
Episode 25, reward: 725.0
Episode 26, reward: 720.0
Episode 27, reward: 735.0
Episode 28, reward: 730.0
Episode 29, reward: 735.0
Episode 30, reward: 635.0
Episode 31, reward: 730.0
Episode 32, reward: 725.0
Episode 33, reward: 690.0
Episode 34, reward: 710.0
Episode 35, reward: 730.0
Episode 36, reward: 720.0
Episode 37, reward: 715.0
Episode 38, reward: 72



Episode 0, reward: 630.0
Episode 1, reward: 725.0
Episode 2, reward: 685.0
Episode 3, reward: 730.0
Episode 4, reward: 725.0
Episode 5, reward: 725.0
Episode 6, reward: 730.0
Episode 7, reward: 730.0
Episode 8, reward: 735.0
Episode 9, reward: 720.0
Episode 10, reward: 725.0
Episode 11, reward: 740.0
Episode 12, reward: 715.0
Episode 13, reward: 730.0
Episode 14, reward: 725.0
Episode 15, reward: 690.0
Episode 16, reward: 730.0
Episode 17, reward: 730.0
Episode 18, reward: 730.0
Episode 19, reward: 725.0
Episode 20, reward: 725.0
Episode 21, reward: 715.0
Episode 22, reward: 695.0
Episode 23, reward: 690.0
Episode 24, reward: 700.0
Episode 25, reward: 725.0
Episode 26, reward: 725.0
Episode 27, reward: 735.0
Episode 28, reward: 735.0
Episode 29, reward: 730.0
Episode 30, reward: 705.0
Episode 31, reward: 725.0
Episode 32, reward: 695.0
Episode 33, reward: 720.0
Episode 34, reward: 700.0
Episode 35, reward: 725.0
Episode 36, reward: 670.0
Episode 37, reward: 725.0
Episode 38, reward: 61



Episode 0, reward: 725.0
Episode 1, reward: 715.0
Episode 2, reward: 735.0
Episode 3, reward: 725.0
Episode 4, reward: 725.0
Episode 5, reward: 605.0
Episode 6, reward: 730.0
Episode 7, reward: 735.0
Episode 8, reward: 740.0
Episode 9, reward: 700.0
Episode 10, reward: 725.0
Episode 11, reward: 675.0
Episode 12, reward: 720.0
Episode 13, reward: 725.0
Episode 14, reward: 725.0
Episode 15, reward: 725.0
Episode 16, reward: 725.0
Episode 17, reward: 725.0
Episode 18, reward: 725.0
Episode 19, reward: 720.0
Episode 20, reward: 730.0
Episode 21, reward: 720.0
Episode 22, reward: 730.0
Episode 23, reward: 720.0
Episode 24, reward: 720.0
Episode 25, reward: 550.0
Episode 26, reward: 725.0
Episode 27, reward: 730.0
Episode 28, reward: 725.0
Episode 29, reward: 730.0
Episode 30, reward: 730.0
Episode 31, reward: 715.0
Episode 32, reward: 725.0
Episode 33, reward: 725.0
Episode 34, reward: 730.0
Episode 35, reward: 735.0
Episode 36, reward: 715.0
Episode 37, reward: 710.0
Episode 38, reward: 72

In [7]:
# Same as before, but with the sds model step 12
n_comp = 39
num_ep = 10
target_task_list = ["baoding", "early_baoding"]  #, "early_baoding", "hand_pose", "hand_reach", "pen", "reorient"]
sorting = "best_to_worst"  # "best_to_worst", "worst_to_best"
env_config = {
        "env_name": "CustomMyoBaodingBallsP1",
        "weighted_reward_keys": {
            "pos_dist_1": 0,
            "pos_dist_2": 0,
            "act_reg": 0,
            "solved": 5,
            "done": 0,
            "sparse": 0,
        },
        "task_choice": "fixed",
        "goal_time_period": (5, 5),
        "seed": 42
    }
env_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/12_period_5/env.pkl",
)
model_path = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/12_period_5/model.zip",
)


for target_task in target_task_list:
    # First choose the pca to use to project the actions
    actions = np.vstack(df_dict[target_task].action)
    pca = PCA(n_components=NUM_MUSCLES).fit(actions)

    env = EnvironmentFactory.create(**env_config)
    vecnormalize = load_vecnormalize(env_path, env)
    vecnormalize.training = False
    vecnormalize.norm_reward = False
    model = load_model(model_path)

    performance = []
    for k in range(n_comp):
        print("Environment: ", target_task, "component ", k)
        if sorting == "best_to_worst":
            components = pca.components_[:n_comp-k]
        elif sorting == "worst_to_best":
            components = pca.components_[k:]
        else:
            raise ValueError("Unknown sorting: ", sorting)

        performance_ep = []
        for n in range(num_ep):
            acts = []
            cum_reward = 0
            lstm_states = None
            obs = env.reset(random_phase=0)
            episode_starts = np.ones((1,), dtype=bool)
            done = False
            timestep = 0
            while not done: 
                timestep += 1
                action, lstm_states = model.predict(vecnormalize.normalize_obs(obs),
                                                        state=lstm_states,
                                                        episode_start=episode_starts,
                                                        deterministic=True,
                                                        )
                
                action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
                action_backproj = np.dot(action_proj,components)+pca.mean_
                obs, rewards, done, info = env.step(action_backproj.reshape(39,))
                episode_starts = done
                cum_reward += rewards
            performance_ep.append(cum_reward)
            print(f"Episode {n}, reward: {cum_reward}")
        performance_ep = np.array(performance_ep) / 1000  # Transform the reward into the solved fraction
        data_point = {'components':components,'solved_frac_mean': np.mean(performance_ep), 'solved_frac_sem': np.std(performance_ep) / np.sqrt(len(performance_ep))}
        performance.append(data_point)
        print(data_point)

    fp = os.path.join(ROOT_DIR, "data", "performance_decay", f"performance_sds_step_12_{target_task}_{sorting}.pkl")
    fp_acts_pcs = open(fp, 'wb')
    pickle.dump(performance, fp_acts_pcs)
    fp_acts_pcs.close()

env path /Users/albertochiappa/Dev/rl/MyoChallengeAnalysis/trained_models/curriculum_steps_complete_baoding_winner/12_period_5/env.pkl
Environment:  baoding component  0




Episode 0, reward: 1000.0
Episode 1, reward: 1000.0
Episode 2, reward: 1000.0
Episode 3, reward: 1000.0
Episode 4, reward: 1000.0
Episode 5, reward: 1000.0
Episode 6, reward: 1000.0
Episode 7, reward: 1000.0
Episode 8, reward: 1000.0
Episode 9, reward: 1000.0
{'components': array([[ 0.3   , -0.3232, -0.1994, ..., -0.0205, -0.1735,  0.1368],
       [ 0.3727, -0.1265, -0.0744, ..., -0.0557, -0.0053,  0.031 ],
       [ 0.0238, -0.0151,  0.1795, ..., -0.1212,  0.1978,  0.3616],
       ...,
       [ 0.0193, -0.1264,  0.0179, ...,  0.1711,  0.0043,  0.1389],
       [-0.0719,  0.0151,  0.1163, ..., -0.2257,  0.0585,  0.2153],
       [ 0.0717,  0.0424, -0.1236, ..., -0.0197,  0.2951, -0.1071]],
      dtype=float32), 'solved_frac_mean': 1.0, 'solved_frac_sem': 0.0}
Environment:  baoding component  1
Episode 0, reward: 895.0
Episode 1, reward: 895.0
Episode 2, reward: 895.0
Episode 3, reward: 895.0
Episode 4, reward: 895.0
Episode 5, reward: 895.0
Episode 6, reward: 895.0
Episode 7, reward: 895.



Episode 0, reward: 885.0
Episode 1, reward: 1000.0
Episode 2, reward: 1000.0
Episode 3, reward: 1000.0
Episode 4, reward: 1000.0
Episode 5, reward: 1000.0
Episode 6, reward: 1000.0
Episode 7, reward: 1000.0
Episode 8, reward: 1000.0
Episode 9, reward: 1000.0
{'components': array([[-0.4285,  0.2883, -0.0646, ...,  0.0357, -0.002 , -0.0285],
       [-0.0371, -0.2124, -0.0783, ..., -0.0019, -0.0548, -0.1632],
       [ 0.0111, -0.3203, -0.0061, ..., -0.1011, -0.3516, -0.1207],
       ...,
       [ 0.1188, -0.0369,  0.2122, ..., -0.1138, -0.049 ,  0.244 ],
       [-0.0818, -0.2414, -0.1809, ..., -0.0507, -0.1098, -0.0814],
       [-0.0929, -0.1294,  0.1509, ..., -0.074 ,  0.0729,  0.0089]],
      dtype=float32), 'solved_frac_mean': 0.9884999999999999, 'solved_frac_sem': 0.010909857927580906}
Environment:  early_baoding component  1
Episode 0, reward: 1000.0
Episode 1, reward: 1000.0
Episode 2, reward: 1000.0
Episode 3, reward: 1000.0
Episode 4, reward: 1000.0
Episode 5, reward: 1000.0
Episo