## Analysis of motor synergies, Todorov's paper (panel A)

In [None]:
from definitions import ROOT_DIR
import os
import numpy as np
import pandas as pd
import seaborn as sns
from functions_notebook import PCvsVar, plot_cumvar
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import json

# Final version - use the newly collected rollouts

In [None]:
data_dir = os.path.join(ROOT_DIR, "data", "rollouts")
control_data_path = os.path.join(data_dir, "control", "hand_pose_1000_episodes_lattice.h5")
control_df = pd.read_hdf(control_data_path)
baoding_data_path = os.path.join(data_dir, "final_model_500_episodes_activations_info_small_variations_ccw", "data.hdf")
baoding_df = pd.read_hdf(baoding_data_path)

In [None]:
import math

math.factorial(39) / math.factorial(36) / math.factorial(3)

In [None]:
def get_episode_vel(episode_pos):
    episode_vel = np.zeros_like(episode_pos)
    episode_vel[1:, :] = (episode_pos[1:] - episode_pos[:-1])  # 40 sim steps per second
    # episode_vel = signal.savgol_filter(episode_pos, window_length=3, polyorder=1, deriv=1, axis=0)
    return episode_vel

def get_pos_vel_act(df):
    if "task" in df.keys():
        pos_list = df.groupby(["episode", "task"])["observation"].agg(lambda x: np.vstack(x)[:, :23]).tolist()
    else:
        pos_list = df.groupby(["episode"])["observation"].agg(lambda x: np.vstack(x)[:, :23]).tolist()
    vel_list = [get_episode_vel(episode_pos) for episode_pos in pos_list]
    pos = np.vstack(pos_list)
    vel = np.vstack(vel_list)
    muscle_act = np.vstack(df.muscle_act)
    return pos, vel, muscle_act

pos_control, vel_control, muscle_act_control = get_pos_vel_act(control_df)
pos_baoding, vel_baoding, muscle_act_baoding = get_pos_vel_act(baoding_df)

In [None]:
# Let's count how many muscles are above a certain activation per step
threshold_list = [0.001, 0.01, 0.05, 0.1, 0.2, 0.5, 0.9]
for threshold in threshold_list:
    num_steps = 1000
    muscle_act_baoding_bin = (muscle_act_baoding > threshold).astype(int).sum(axis=1)
    muscle_act_control_bin = (muscle_act_control > threshold).astype(int).sum(axis=1)
    print(f"Avg number of muscles above {threshold} (badoing): {np.mean(muscle_act_baoding_bin)}")
    print(f"Avg number of muscles above {threshold} (control): {np.mean(muscle_act_control_bin)}")
    plt.figure(figsize=(15, 4))
    plt.plot(muscle_act_baoding_bin[:num_steps], label="baoding")
    plt.plot(muscle_act_control_bin[:num_steps], label="control")
    plt.title(f"Number of muscles more active than {threshold}")
    plt.legend()
    plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "rebuttal", f"activation_time_series_th_{threshold}"), format="png", dpi=600, bbox_inches="tight")
    plt.show()
    


In [None]:
# Plot active muscles as a function of the activity threshold
min_val = min(np.min(muscle_act_baoding), np.min(muscle_act_control))
max_val = max(np.max(muscle_act_baoding), np.max(muscle_act_control))
eps = 1e-6
threshold_vec = np.linspace(min_val - eps, max_val - eps, 1000)

active_baoding_list = []
active_control_list = []
for threshold in threshold_vec:
    muscle_act_baoding_bin = (muscle_act_baoding > threshold).astype(int).sum(axis=1)
    muscle_act_control_bin = (muscle_act_control > threshold).astype(int).sum(axis=1)
    avg_active_muscles_baoding = np.mean(muscle_act_baoding_bin)
    avg_active_muscles_control = np.mean(muscle_act_control_bin)
    active_baoding_list.append(avg_active_muscles_baoding)
    active_control_list.append(avg_active_muscles_control)





In [None]:
fig, ax = plt.subplots(figsize=(4, 3))
threshold_vec_pct = threshold_vec * 100
ax.plot(threshold_vec_pct, active_baoding_list, label="Baoding", color="dodgerblue")
ax.plot(threshold_vec_pct, active_control_list, label="Control (Hand Pose)", color="orangered")
ax.set_title("Average active muscles")
ax.set_xlabel("Activation threshold %", fontsize=12)
ax.set_ylabel("Number of muscles", fontsize=12)
ax.legend()
fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "rebuttal", f"activation_vs_threshold.png"), format="png", dpi=600, bbox_inches="tight")
fig.show()

In [None]:
# Number of muscles active at 5%
idx = sum(threshold_vec < 0.05)
print("Control: ", active_control_list[idx])
print("Baoding: ", active_baoding_list[idx])

In [None]:
muscle_act_baoding.shape

In [None]:
num_points = 10000
muscle_df = pd.DataFrame(muscle_act_baoding[:, :])
ax = sns.pairplot(muscle_df[:num_points], diag_kind='kde', corner=True, plot_kws=dict(
        hue=baoding_df.step[:num_points],
        palette="plasma",
        alpha=0.3,
        s=5
    ),)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "rebuttal", f"pairplots.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
# PCA of the hand poses for the control (hand pose) and for the task (baoding)
num_joints = 23 
num_muscles = 39 

exp_var_pos_control = PCvsVar(df=pos_control, n_comp=num_joints)
exp_var_pos_baoding = PCvsVar(df=pos_baoding, n_comp=num_joints)

exp_var_vel_control = PCvsVar(df=vel_control, n_comp=num_joints)
exp_var_vel_baoding = PCvsVar(df=vel_baoding, n_comp=num_joints)

exp_var_muscle_control = PCvsVar(df=muscle_act_control, n_comp=num_muscles)
exp_var_muscle_baoding = PCvsVar(df=muscle_act_baoding, n_comp=num_muscles)

exp_var_dict = {
    "pos": {
        "control": exp_var_pos_control,
        "baoding": exp_var_pos_baoding
    },
    "vel": {
        "control": exp_var_vel_control,
        "baoding": exp_var_vel_baoding
    },
    "muscle_act": {
        "control": exp_var_muscle_control,
        "baoding": exp_var_muscle_baoding
    }
}

In [None]:
def get_dof_count(exp_var, threshold=0.85):
    cum_exp_var = np.cumsum(exp_var)
    for idx, val in enumerate(cum_exp_var):
        if val > threshold:
            return idx + 1

In [None]:
levels = [0.85, 0.95]
dof_count_dict = {}
for data_type, task_var_dict in exp_var_dict.items():
    dof_per_task_dict = {}
    for task, exp_var in task_var_dict.items():
        dof_per_level_dict = {}
        for l in levels:
            dof_count = get_dof_count(exp_var, l)
            dof_per_level_dict[l] = dof_count
        dof_per_level_dict["avg"] = np.mean(list(dof_per_level_dict.values()))
        dof_per_task_dict[task] = dof_per_level_dict
    dof_count_dict[data_type] = dof_per_task_dict

print(json.dumps(dof_count_dict, indent=4))

In [None]:
# Extracted from Todorov's paper 
experimental_dof = {
    "pos": {
        "control": {
            "0.85": 7,
            "0.95": 10,
            "avg": 8.5
        },
        "baoding": {
            "0.85": 3,
            "0.95": 7,
            "avg": 5
        }
    },
    "vel": {
        "control": {
            "0.85": 8,
            "0.95": 12,
            "avg": 10
        },
        "baoding": {
            "0.85": 4,
            "0.95": 8,
            "avg": 6
        }
    }
}

In [None]:
def plot_compare_explained_variance(exp_var_1, exp_var_2, exp_var_1_label=None, exp_var_2_label=None):
    assert len(exp_var_1) == len(exp_var_2)
    fig, ax = plt.subplots()
    ax.step(range(1, len(exp_var_1) + 1), np.cumsum(exp_var_1), where='mid',label=exp_var_1_label, linewidth=3, color="dodgerblue")
    ax.step(range(1, len(exp_var_1) + 1), np.cumsum(exp_var_2), where='mid',label=exp_var_2_label, linewidth=3, color="orangered")
    ax.set_xlabel('Number of PCs',fontsize=21)
    ax.set_ylabel('Cum. explained variance',fontsize=21)
    plt.legend(fontsize=21,loc='best')
    ax.tick_params(axis='both', labelsize=20)
    ax.axhline(y=0.95, color='black', linestyle='--', alpha=0.5)
    ax.axhline(y=0.85, color='black', linestyle='--', alpha=0.5)
    ax.text(18, 0.9, '95%', color = 'black', fontsize=18)
    ax.text(18, 0.8, '85%', color = 'black', fontsize=18)
    return fig, ax
    
fig, ax = plot_compare_explained_variance(exp_var_pos_baoding, exp_var_pos_control, "Baoding", "Control (hand pose)")
# fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", "pca_pos.png"), format="png", dpi=600, bbox_inches="tight")
fig.show()

fig, ax = plot_compare_explained_variance(exp_var_vel_baoding, exp_var_vel_control, "Baoding", "Control (hand pose)")
# fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", "pca_vel.png"), format="png", dpi=600, bbox_inches="tight")

fig, ax = plot_compare_explained_variance(exp_var_muscle_baoding, exp_var_muscle_control, "Baoding", "Control (hand pose)")
# fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", "pca_muscle_act.png"), format="png", dpi=600, bbox_inches="tight")


Interestingly, while the poses are embedded in a lower dimensional space for the baoding balls task, this is not the case for the muscle activations. In fact, we can hypotesize that the presence of objects and variable environment conditions forces the policy to be more robust, thus preventing the emergence of too stereotypical muscle activations

In [None]:
plot_cumvar(n_comp=num_joints,exp_var_ratio=exp_var_pos_baoding,title='Joint angular positions')
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", "pca_pos_baoding_individual_variance.png"), format="png", dpi=600, bbox_inches="tight")

In [None]:
n_comp=15
exp_var_ratio=exp_var_pos_baoding[:15]
title='Joint angles'
plt.figure(figsize=(4, 2))
plt.bar(range(1,n_comp+1), exp_var_ratio, alpha=0.5, align='center',label='Individual variance', color="dodgerblue")
# plt.xlabel('Number of PCs',fontsize=21)
# plt.ylabel('Explained\nvariance',fontsize=21)
# plt.legend(fontsize=21,loc='best')
plt.title(title,fontsize=21)
plt.yticks(fontsize=21)
# plt.xticks([0, 5, 10, 15], fontsize=21)
plt.xticks([])
plt.subplots_adjust(left=0.15,bottom=0.15)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", "cum_var_barplot_sds.png"), format="png", dpi=800, bbox_inches="tight")


In [None]:
exp_var_ratio_todorov = np.array([35.88, 21.80, 11.31, 7.60, 5.24, 4.21, 3.29, 2.47, 2.06, 1.34, 0.92, 0.72, 0.51, 0.41, 0.30]) * 1e-2
n_comp=15
exp_var_ratio=exp_var_ratio_todorov
# title='Joint angles'
plt.figure(figsize=(4, 2))
plt.bar(range(1,n_comp+1), exp_var_ratio, alpha=0.5, align='center',label='Individual variance', color="dodgerblue")
plt.xlabel('Number of PCs',fontsize=21)
# plt.ylabel('Explained\nvariance',fontsize=21)
# plt.legend(fontsize=21,loc='best')
# plt.title(title,fontsize=21)
plt.yticks(fontsize=21)
plt.xticks([0, 5, 10, 15], fontsize=21)
plt.subplots_adjust(left=0.15,bottom=0.15)
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", "cum_var_barplot_human.png"), format="png", dpi=800, bbox_inches="tight")


In [None]:
# Compute the DOFs for all datasets
baoding_ccw_path = os.path.join(data_dir, "final_model_500_episodes_activations_info_small_variations_ccw", "data.hdf")
ccw_df = pd.read_hdf(baoding_ccw_path)
baoding_cw_path = os.path.join(data_dir, "final_model_500_episodes_activations_info_small_variations_cw", "data.hdf")
cw_df = pd.read_hdf(baoding_cw_path)

both_df = pd.concat((cw_df, ccw_df), axis=0)

pos_cw, vel_cw, muscle_act_cw = get_pos_vel_act(cw_df)
pos_ccw, vel_ccw, muscle_act_ccw = get_pos_vel_act(ccw_df)
pos_both, vel_both, muscle_act_both = get_pos_vel_act(both_df)

In [None]:
data_dict = {
    "small": {
        "pos": {
            "cw": pos_cw,
            "ccw": pos_ccw,
            "both": pos_both
        },
        "vel": {
            "cw": vel_cw,
            "ccw": vel_ccw,
            "both": vel_both
        },
        "muscle_act": {
            "cw": muscle_act_cw,
            "ccw": muscle_act_ccw,
            "both": muscle_act_both
        }
    }
}

variation_feature_pca_dict = {}
for variation, feature_dict in data_dict.items():
    feature_pca_dict = {}
    for feature, direction_dict in feature_dict.items():
        direction_var_ratio_dict = {}
        for direction, dataset in direction_dict.items():
            if feature in ["pos", "vel"]:
                exp_var_ratio = PCA(n_components=num_joints).fit(dataset).explained_variance_ratio_
            elif feature == "muscle_act":
                exp_var_ratio = PCA(n_components=num_muscles).fit(dataset).explained_variance_ratio_
            direction_var_ratio_dict[direction] = exp_var_ratio
        feature_pca_dict[feature] = direction_var_ratio_dict
    variation_feature_pca_dict[variation] = feature_pca_dict

In [None]:
levels = [0.85, 0.95]
for variation, feature_pca_dict in variation_feature_pca_dict.items():
    dof_count_dict = {}
    for feature, direction_dict in feature_pca_dict.items():
        direction_dof_dict = {}
        for direction, exp_var in direction_dict.items():
            dof_per_level_dict = {}
            for l in levels:
                dof_count = get_dof_count(exp_var, l)
                dof_per_level_dict[l] = dof_count
            dof_per_level_dict["avg"] = np.mean(list(dof_per_level_dict.values()))
            direction_dof_dict[direction] = dof_per_level_dict
        dof_count_dict[feature] = direction_dof_dict

    import json
    print(json.dumps(dof_count_dict, indent=4))

In [None]:
# Create a list to store DataFrames
dfs = []

# Iterate over the nested dictionary and append DataFrames to the list
for feature, directions in dof_count_dict.items():
    for direction, thresholds in directions.items():
        df = pd.DataFrame([(feature, direction, threshold, value) for threshold, value in thresholds.items()],
                          columns=['features', 'direction', 'threshold', 'value'])
        dfs.append(df)

# Concatenate the list of DataFrames
df = pd.concat(dfs, ignore_index=True)

# Pivot the DataFrame to reshape it
df_pivot = df.pivot_table(index='threshold', columns=['features', 'direction'], values='value')

# Display the result
pretty_print_dict = {
    "cw": "CW",
    "ccw": "CCW",
    "both": "Both",
    "pos": "Position",
    "vel": "Velocity",
    "muscle_act": "Muscle activation"
}

indices = [(pretty_print_dict[feature], pretty_print_dict[direction]) for feature in ["pos", "vel", "muscle_act"] for direction in ["cw", "ccw", "both"]]

df_pivot = df_pivot.rename(columns=pretty_print_dict)
print(df_pivot[indices].to_latex(float_format="%.1f"))


In [None]:
# Validate the size of the dataset
num_joints = 23 
num_muscles = 39
levels = [0.85, 0.95]

percentages = (np.arange(10) + 1) ** 3 / 10

data_dict = {
    "pos": {
        "cw": pos_cw,
        "ccw": pos_ccw,
        "both": pos_both,
        "control": pos_control
    },
    "vel": {
        "cw": vel_cw,
        "ccw": vel_ccw,
        "both": vel_both,
        "control": vel_control
    },
    "muscle_act": {
        "cw": muscle_act_cw,
        "ccw": muscle_act_ccw,
        "both": muscle_act_both,
        "control": muscle_act_control
    }
}

feature_dir_dof_count_dict = {}
for feature, dir_dict in data_dict.items():
    dir_dof_count_dict = {}
    for dir, data in dir_dict.items():
        dof_count_list = []
        for pct in percentages:
            data_pct = data[:int(len(data) * pct // 100)]
            if feature in ["pos", "vel"]:
                exp_var = PCvsVar(df=data_pct, n_comp=num_joints)
            elif feature == "muscle_act":
                exp_var = PCvsVar(df=data_pct, n_comp=num_muscles)
            else:
                raise NotImplementedError(feature)
            dof_count = np.mean([get_dof_count(exp_var, l) for l in levels])
            dof_count_list.append(dof_count)
        dir_dof_count_dict[dir] = dof_count_list
    feature_dir_dof_count_dict[feature] = dir_dof_count_dict

In [None]:
legend_dict = {
    "pos": "Joint angles",
    "vel": "Joint angular velocities",
    "muscle_act": "Muscle activations",
    "ccw": "Baoding CCW",
    "cw": "Baoding CW",
    "both": "Baoding CW and CCW",
    "control": "Control"
}
max_episodes_dict = {
    "ccw": 500,
    "cw": 500,
    "both": 1000,
    "control": 1000
}

for dataset in ["cw", "ccw", "both", "control"]:
    fig, ax = plt.subplots(figsize=(4, 3))
    for feature in ["pos", "vel", "muscle_act"]:
        x = percentages * max_episodes_dict[dataset] / 100
        y = feature_dir_dof_count_dict[feature][dataset]
        ax.plot(x, y, ".-", label=legend_dict[feature])
    ax.set_title(legend_dict[dataset])
    ax.set_xlabel("Number of episodes", fontsize=12)
    ax.set_ylabel("Estimated dimensionality", fontsize=12)
    # ax.legend(bbox_to_anchor=(1, 1))
    fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_1", f"dataset_size_validation_{dataset}.png"), format="png", dpi=600, bbox_inches="tight")
    fig.show()