## Analysis of high- and low-variance PCs in velocity space during rotation (panel B)

In [None]:
from definitions import ROOT_DIR
import os
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import matplotlib
from functions_notebook import make_parallel_envs,set_config,cross_project_kin,plot_cross_projection,mean_ratio
import pickle
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from stable_baselines3.common.vec_env import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.environment_factory import EnvironmentFactory
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score

## Load the rollouts

In [None]:
cw_path = os.path.join(ROOT_DIR, "data", "rollouts", "final_model_500_episodes_activations_info_small_variations_cw", "data.hdf")
rollouts_cw = pd.read_hdf(cw_path)
ccw_path = os.path.join(ROOT_DIR, "data", "rollouts", "final_model_500_episodes_activations_info_small_variations_ccw", "data.hdf")
rollouts_ccw = pd.read_hdf(ccw_path)
rollouts_df = pd.concat((rollouts_cw, rollouts_ccw)).reset_index()

In [None]:
def average_by_timestep(vec, timesteps):
    out_vec = []
    for ts in sorted(np.unique(timesteps)):
        out_vec.append(np.mean(vec[timesteps == ts], axis=0))
    return np.vstack(out_vec)

In [None]:
def measure_tangling(data):
    derivative = np.gradient(data,axis=0) * 40  # sample frequency

    # epsilon = 0.1*np.mean(np.linalg.norm(data,axis=1))
    epsilon = 1e-10 # * np.mean(np.linalg.norm(data, axis=1))
    # epsilon = 1e-1

    Q_all = []
    for t in range(derivative.shape[0]):
        Q = (np.linalg.norm(derivative[t] - derivative, axis=1)**2) / (epsilon + np.linalg.norm(data[t] - data, axis=1)**2)
        Q = np.max(Q)
        Q_all.append(Q)
    
    return np.mean(Q_all)  # as per definition

In [None]:
# PCA plots of different component ranges
num_muscles = 39
num_joints = 23
muscle_act = np.vstack(rollouts_df.muscle_act)
pos = np.vstack(rollouts_df.observation)[:, :num_joints]

pos_pc_range_list = [(0, 3), (5, 8), (12, 15), (20, 23)]
muscle_act_pc_range_list = [(0, 3), (23, 26), (36, 39)]
cmap_list = ["Reds", "Blues"]
dir_list = ["cw", "ccw"]
label_list = ["Clockwise", "Counter-clockwise"]
data_name_list = ["joint_pos", "muscle_act"]

for data, pc_range_list, data_name in zip([pos, muscle_act], [pos_pc_range_list, muscle_act_pc_range_list], data_name_list):
    pca = PCA(n_components=data.shape[1])
    out = pca.fit_transform(data)

    for pc_range in pc_range_list:
        fig = plt.figure(figsize=(4, 4))
        ax = fig.add_subplot(projection="3d")

        tangling_list = []
        for cmap_name, direction, label in zip(cmap_list, dir_list, label_list):
            out_direction = out[rollouts_df.task == direction]
            cmap = matplotlib.colormaps[cmap_name]
            color_list = [cmap(i) for i in np.linspace(0.5, 1, 200)]    
            colors = [color_list[idx] for idx in rollouts_df.step[rollouts_df.task == direction]]
            plot_mat = out_direction[:, pc_range[0]:pc_range[1]]
            mean_traj = average_by_timestep(plot_mat, rollouts_df.step[rollouts_df.task == direction])
            tangling_list.append(measure_tangling(mean_traj))
            ax.scatter(mean_traj[:, 0], mean_traj[:, 1], mean_traj[:, 2], c=color_list, label=label)
        print(data_name, "PCs:", pc_range, "Tangling:", np.mean(tangling_list))

        ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
        ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
        ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
        ax.xaxis._axinfo["grid"]['color'] =  (1,1,1,0)
        ax.yaxis._axinfo["grid"]['color'] =  (1,1,1,0)
        ax.zaxis._axinfo["grid"]['color'] =  (1,1,1,0)
        ax.view_init(30, 45)
        ax.set_xlabel(f"\n\nPC {pc_range[0] + 1}", fontsize=12)
        ax.set_ylabel(f"\n\nPC {pc_range[0] + 2}", fontsize=12)
        ax.set_zlabel(f"\n\nPC {pc_range[0] + 3}", fontsize=12)
        ax.set_box_aspect(aspect=None, zoom=0.7)
        ax.ticklabel_format(style="sci", scilimits=(-2, 2))
        ax.locator_params(axis='both', nbins=4)
        ax.tick_params(axis='both', which='major', labelsize=12)
        ax.tick_params(axis='both', which='minor', labelsize=10)
        # ax.legend()
        out_name = f"pca_{data_name}_components_{'_'.join(str(el) for el in pc_range)}.png"
        fig.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_2", out_name), format="png", dpi=800, bbox_inches="tight")
        fig.show()

In [None]:
# Task decoding from the pca trajectories
num_muscles = 39
num_joints = 23
num_episodes_per_direction = 500
max_episode_len = 200
muscle_act = np.vstack(rollouts_df.muscle_act)
pos = np.vstack(rollouts_df.observation)[:, :num_joints]

pos_pc_range_list = [(0, 3), (5, 8), (12, 15), (20, 23)]
pos_pc_range_span = 3
muscle_act_pc_range_list = [(0, 3), (23, 26), (36, 39)]
dir_list = ["cw", "ccw"]
data_name_list = ["joint_pos", "muscle_act"]

for data, pc_range_list, data_name in zip([pos, muscle_act], [pos_pc_range_list, muscle_act_pc_range_list], data_name_list):
    pca = PCA(n_components=data.shape[1])
    out = pca.fit_transform(data)

    for pc_range in pc_range_list:
        X = np.empty((num_episodes_per_direction * len(dir_list), max_episode_len * pos_pc_range_span))
        y = np.empty(num_episodes_per_direction * len(dir_list))
        for dir_idx, dir in enumerate(dir_list):
            for ep_id in range(num_episodes_per_direction):
                step_idx_mask = (rollouts_df.episode == ep_id) & (rollouts_df.task == dir)
                row = out[step_idx_mask, pc_range[0]: pc_range[1]].flatten()
                X[ep_id + dir_idx * num_episodes_per_direction, : len(row)] = row
                y[ep_id + dir_idx * num_episodes_per_direction] = dir_idx
        X = X[:, ~np.all(X[1:] == X[:-1], axis=0)]  # drop constant columns
        
        classification = LogisticRegression()
        cv = KFold(n_splits=5, shuffle=True, random_state=42)
        cv_score = cross_val_score(classification, X, y, cv=cv)

        print(data_name, ", PC range:", pc_range, ", score:", cv_score, ", avg score:", np.mean(cv_score))