## Linear correlation of non-explicitely encoded variables with observations and LSTM output (panel D)

In [None]:
import os
import torch
import numpy as np
import pandas as pd
from definitions import ROOT_DIR
import sklearn.linear_model
from envs.environment_factory import EnvironmentFactory
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.vec_env import VecNormalize
from functions_notebook import make_parallel_envs
from matplotlib.cm import get_cmap
import matplotlib.pyplot as plt
import scipy.stats as stats


# Load the dataset and run linear regressions

In [None]:
df1 = pd.read_hdf(os.path.join(ROOT_DIR, "data", "rollouts", "final_model_500_episodes_activations_info_ccw", "data.hdf"))
df2 = pd.read_hdf(os.path.join(ROOT_DIR, "data", "rollouts", "final_model_500_episodes_activations_info_cw", "data.hdf"))

df = pd.concat((df1, df2)).reset_index()
df.keys()

In [None]:
results_list = []
regression = sklearn.linear_model.LinearRegression()
for target in ["mass_1", "mass_2", "size_1", "size_2", "friction_0", "friction_1", "friction_2", "x_radius", "y_radius"]:
    for key in ["observation", "lstm_state_0", "lstm_state_1", "lstm_out", "layer_1_out", "layer_2_out", "action"]:
        X = np.array(df[key].to_list())
        y = df[target].to_numpy()
        cv = sklearn.model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
        # lin_model = regression.fit(X, y)
        # score = regression.score(X, y)
        cv_score = sklearn.model_selection.cross_val_score(regression, X, y, cv=cv)
        print("Key:", key, " target:", target,  "score:", cv_score)
        results_list.append({"input": key, "target": target, "score": cv_score})


In [None]:
classification = sklearn.linear_model.LogisticRegression(max_iter=10_000)
target = "task"
for key in ["observation", "lstm_state_0", "lstm_state_1", "lstm_out", "layer_1_out", "layer_2_out", "action"]:
    X = np.array(df[key].to_list())
    y = df[target].to_numpy()
    cv = sklearn.model_selection.StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_score = sklearn.model_selection.cross_val_score(classification, X, y, cv=cv)
    print("Key:", key, " target:", target,  "score:", cv_score)
    results_list.append({"input": key, "target": target, "score": cv_score})

In [None]:
regression = sklearn.linear_model.LinearRegression()
for target in ["hand_pos", "hand_vel"]:
    for key in ["observation", "lstm_state_0", "lstm_state_1", "lstm_out", "layer_1_out", "layer_2_out", "action"]:
        X = np.array(df[key].to_list())
        y = np.array(df[target].to_list())

        cv = sklearn.model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
        cv_score = sklearn.model_selection.cross_val_score(regression, X, y, cv=cv)

        print("Key:", key, " target:", target,  "score:", cv_score)
        results_list.append({"input": key, "target": target, "score": cv_score})

In [None]:
results_list_mean_sem = [{
    "input": el["input"],
    "target": el["target"],
    "score_mean": np.mean(el["score"]),
    "score_std": np.std(el["score"])
    }
    for el in results_list
]
results_df = pd.DataFrame(results_list_mean_sem)
results_df

In [None]:
layers_list = ["observation", "lstm_state_1", "lstm_out", "layer_1_out", "layer_2_out", "action"]
layers_name_list = ["Observation", "LSTM state", "LSTM out", "Layer 1 out", "Layer 2 out", "Action"]
targets_list = ["mass_1", "mass_2", "size_1", "size_2", "friction_0", "friction_1", "friction_2", "x_radius", "y_radius", "task", "hand_pos", "hand_vel"]
targets_name_list = ["Mass 1", "Mass 2", "Size 1", "Size 2", "Friction 0", "Friction 1", "Friction 2", "Radius x", "Radius y", "Task", "Joint pos", "Joint vel"]

# Create a colormap with distinct colors
num_value_types = len(set([value.split("_")[0] for value in targets_list]))
cmap = plt.get_cmap('brg')
colors = [cmap(i % num_value_types) for i in range(len(targets_list))]

fig, ax = plt.subplots(1)
for target, target_name, c in zip(targets_list, targets_name_list, colors):
    score_list = []
    for layer in layers_list:
        score = results_df[(results_df.input == layer) & (results_df.target == target)].score_mean.item()
        score_list.append(score)
    score_vec = np.array(score_list) / max(score_list)
    ax.plot(score_vec, label=target_name, color=c)
ax.legend()
ax.set_xticks(range(6), labels=layers_name_list, rotation=30)
ax.set_ylabel("Rescaled score")
ax.legend(bbox_to_anchor=(1, 1))
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_4", "layer_encoding.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()
    

In [None]:
layers_list = ["observation", "lstm_state_1", "lstm_out", "layer_1_out", "layer_2_out", "action"]
layers_name_list = ["Observation", "LSTM state", "LSTM out", "Layer 1 out", "Layer 2 out", "Action"]
targets_list = ["mass_1", "mass_2", "size_1", "size_2", "friction_0", "friction_1", "friction_2", "x_radius", "y_radius", "task", "hand_pos", "hand_vel"]
targets_name_list = ["Mass 1", "Mass 2", "Size 1", "Size 2", "Friction 0", "Friction 1", "Friction 2", "Radius x", "Radius y", "Task", "Joint pos", "Joint vel"]

# Create a colormap with distinct colors
num_value_types = len(set([value.split("_")[0] for value in targets_list]))
cmap = plt.get_cmap('tab20')
colors = [cmap(i % num_value_types) for i in range(len(targets_list))]

fig, ax = plt.subplots(1)
for target, target_name, c in zip(targets_list, targets_name_list, colors):
    score_list = []
    for layer in layers_list:
        score = results_df[(results_df.input == layer) & (results_df.target == target)].score_mean.item()
        score_list.append(score)
    score_vec = np.array(score_list) / max(score_list)
    ax.plot(score_vec, label=target_name, color=c)
ax.legend()
ax.set_xticks(range(6), labels=layers_name_list, rotation=30)
ax.set_ylabel("Rescaled score")
ax.legend(bbox_to_anchor=(1, 1))
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_4", "layer_encoding.png"), format="png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
layers_list = ["observation", "lstm_state_1", "lstm_out", "layer_1_out", "layer_2_out", "action"]
layers_name_list = ["Observation", "Memory", "LSTM output", "Layer 1", "Layer 2", "Action"]
targets_list = ["mass_1", "mass_2", "size_1", "size_2", "friction_0", "friction_1", "friction_2", "x_radius", "y_radius", "task", "hand_pos", "hand_vel"]
targets_name_list = ["Mass 1", "Mass 2", "Size 1", "Size 2", "Friction 0", "Friction 1", "Friction 2", "Radius x", "Radius y", "Task", "Joint pos", "Joint vel"]

# Create a pivot table to reshape the data
pivot_data = results_df.pivot(index="target", columns="input", values="score_mean").loc[targets_list]
pivot_std = results_df.pivot(index="target", columns="input", values="score_std").loc[targets_list]

# Set the width of each bar
bar_width = 0.15

# Create an array of x values for the bars
x = np.arange(len(targets_list))

# Create a grouped barplot
fig, ax = plt.subplots(figsize=(5, 3.5))
cmap = get_cmap("coolwarm")


for i, layer in enumerate(layers_list):
    ax.bar(x + i * bar_width, pivot_data[layer], bar_width, yerr=pivot_std[layer], label=layers_name_list[i], color=cmap((i) / (len(layers_list))), alpha=0.9)

# Set x-axis labels and tick positions
ax.set_xticks(x + (len(pivot_data.columns) / 2) * bar_width)
ax.set_xticklabels(targets_name_list, rotation=45, ha='right')

# Set labels and title
# ax.set_xlabel('Encoded quantity')
ax.set_ylabel('Encoding score', fontsize=12)
# ax.set_title('Encoding Value by Layer for Each Quantity')

# Add a legend
ax.legend()

# Show the plot
plt.tight_layout()
plt.savefig(os.path.join(ROOT_DIR, "data", "figures", "panel_4", "layer_encoding_barplot.png"), format="png", dpi=600, bbox_inches="tight")

plt.show()

1. Record the observations and LSTM outputs together with hand's velocity and acceleration, ball mass, size and friction, and trajectory radius. __Go to 2. to directly load the previously-obtained data__

In [None]:
PATH_TO_NORMALIZED_ENV = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
PATH_TO_PRETRAINED_NET = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

config = {
"weighted_reward_keys": {
    "pos_dist_1": 0,
    "pos_dist_2": 0,
    "act_reg": 0,
    "alive": 0,
    "solved": 5,
    "done": 0,
    "sparse": 0
},
"enable_rsi": False,
"rsi_probability": 0,
"balls_overlap": False,
"overlap_probability": 0,
"noise_fingers": 0,
"limit_init_angle": 0,
"goal_time_period": [
    4,
    6
],
"goal_xrange": [
    0.02,
    0.03
],
"goal_yrange": [
    0.022,
    0.032
],
"obj_size_range": [
    0.018,
    0.021
],
"obj_mass_range": [
    0.03,
    0.3
],
"obj_friction_change": [
    0.2,
    0.001,
    2e-05
],
"task_choice": "random"
}

env_name = 'CustomMyoBaodingBallsP2'
render = False

envs = make_parallel_envs(env_name, config, num_env=1)
envs = VecNormalize.load(PATH_TO_NORMALIZED_ENV, envs)
envs.training = False
envs.norm_reward = False
custom_objects = {
    "learning_rate": lambda _: 0,
    "lr_schedule": lambda _: 0,
    "clip_range": lambda _: 0,
}
model = RecurrentPPO.load(
    PATH_TO_PRETRAINED_NET, env=envs, device="cpu", custom_objects=custom_objects
)

# EVALUATE
eval_model = model
eval_env = EnvironmentFactory.create(env_name, **config)   

num_episodes = 500

data_list = []
for n in range(num_episodes) :
    lstm_states = (np.zeros((1, 1, 256)), np.zeros((1, 1, 256)))
    cum_rew = 0
    step = 0
    obs = eval_env.reset()
    episode_starts = torch.ones((1,))
    done = False
    while not done:
        lstm_states_tensor = (torch.tensor(lstm_states[0], dtype=torch.float32).reshape(1, -1), torch.tensor(lstm_states[1], dtype=torch.float32).reshape(1, -1))     
        action, lstm_states = eval_model.predict(
            envs.normalize_obs(obs),
            state=lstm_states,
            episode_start=episode_starts,
            deterministic=True,
        )
        with torch.no_grad():
            features = eval_model.policy.extract_features(torch.tensor(envs.normalize_obs(obs)).reshape(1, -1))
            lstm_out, _ = eval_model.policy.lstm_actor(features, (lstm_states_tensor[0] * (1 - episode_starts), lstm_states_tensor[1] * (1 - episode_starts)))
            layer_1_out = eval_model.policy.mlp_extractor.policy_net[1](eval_model.policy.mlp_extractor.policy_net[0](lstm_out))
            layer_2_out = eval_model.policy.mlp_extractor.policy_net[3](eval_model.policy.mlp_extractor.policy_net[2](layer_1_out))
            action_pred = eval_model.policy._get_action_dist_from_latent(layer_2_out).mode().clip(-1, 1)

        assert np.allclose(action_pred, action), print(action_pred, action)
        next_obs, rewards, done, info = eval_env.step(action)
        episode_starts = done
        cum_rew += rewards
        step += 1

        hand_pos = obs[0:23]
        hand_vel = (next_obs[0:23] - hand_pos) / 0.0025
        
        data_point = {
            'episode' : n, 
            'time step': step,
            'observation': envs.normalize_obs(obs),
            "lstm_state_0": np.squeeze(lstm_states[0]),
            "lstm_state_1": np.squeeze(lstm_states[1]),
            "lstm_out": np.squeeze(lstm_out.numpy()),
            "layer_1_out": np.squeeze(layer_1_out.numpy()),
            "layer_2_out": np.squeeze(layer_2_out.numpy()),
            "action": action,
            "mass_1": eval_env.sim.model.body_mass[eval_env.object1_bid],
            "size_1": eval_env.sim.model.geom_size[eval_env.object1_gid][0],
            "mass_2": eval_env.sim.model.body_mass[eval_env.object2_bid],
            "size_2": eval_env.sim.model.geom_size[eval_env.object2_gid][0],
            "friction_0": eval_env.sim.model.geom_friction[eval_env.object1_gid][0],
            "friction_1": eval_env.sim.model.geom_friction[eval_env.object1_gid][1],
            "friction_2": eval_env.sim.model.geom_friction[eval_env.object1_gid][2],
            "x_radius": eval_env.x_radius,
            "y_radius": eval_env.y_radius,
            "task": eval_env.which_task.value,
            "hand_pos": hand_pos,
            "hand_vel": hand_vel
        }
        data_list.append(data_point)
        obs = next_obs
    print("Reward:", cum_rew, "length:", step) 
        
df = pd.DataFrame(data_list)
df.to_hdf(os.path.join(ROOT_DIR, "data", "basecamp", "activation_df.hdf"), key="activations")            


2. Load the data

In [None]:
# # Load the file from Basecamp : 'lin_corr'
# params = pickle.load(open(os.path.join(ROOT_DIR, "data", "basecamp", "lin_corr"),'rb'))

# M=params['Mass']
# Ra=params['Radius']
# Fr=params['Friction']
# S=params['Size']
# VEL=params['Velocity']
# ACC=params['Acceleration']
# OBS=params['Observations']
# LSTM=params['LSTM']

3. __Go to 4. to directly access the previously-obtained data.__\
a. Compute the linear regression and the associated coefficient of determination\
b. Save the data 

In [None]:
# regression = sklearn.linear_model.LinearRegression()

# R_lstm = {'hand velocity':None,'hand acceleration':None,'mass':None,'size':None,'friction':None,'radius':None}
# R_obs = {'hand velocity':None,'hand acceleration':None,'mass':None,'size':None,'friction':None,'radius':None}

# layers = [OBS,LSTM]
# R = [R_obs,R_lstm]

# for i in range(len(R)):

#     lin_model = regression.fit(y=VEL,X=layers[i])
#     R[i]['hand velocity'] = np.round(lin_model.score(y=VEL,X=layers[i]),5)

#     lin_model = regression.fit(y=ACC,X=layers[i])
#     R[i]['hand acceleration'] = np.round(lin_model.score(y=ACC,X=layers[i]),5)

#     lin_model = regression.fit(y=M,X=layers[i])
#     R[i]['mass'] = np.round(lin_model.score(X=layers[i],y=M),5)

#     lin_model = regression.fit(y=S,X=layers[i])
#     R[i]['size'] = np.round(lin_model.score(y=S,X=layers[i]),5)

#     lin_model = regression.fit(y=Fr,X=layers[i])
#     R[i]['friction'] = np.round(lin_model.score(y=Fr,X=layers[i]),5)

#     lin_model = regression.fit(y=Ra,X=layers[i])
#     R[i]['radius'] = np.round(lin_model.score(y=Ra,X=layers[i]),5)

# for R_layer in R : 
#     print(R_layer)

# fp = ".csv"
# pd.DataFrame(R).to_csv(os.path.join(ROOT_DIR,fp))

4. Load the previously-obtained R squared

In [None]:
# # Load the file from Basecamp : 'v2_32_phase_2_smaller_rate_resume.csv'
# R = pd.read_csv(os.path.join(ROOT_DIR,"SIL-Results/Linear-correlation/v2_32_phase_2_smaller_rate_resume.csv"))

In [None]:
df = pd.read_hdf(os.path.join(ROOT_DIR, "data", "basecamp", "activation_df.hdf"))
df.keys()