# MyoChallenge Policy Analysis


In [7]:
from definitions import ROOT_DIR
import os
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from functions import make_parallel_envs,set_config
import pickle
from stable_baselines3.common.vec_env import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.environment_factory import EnvironmentFactory
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler


ImportError: cannot import name 'set_config' from 'functions' (/home/ingster/Bureau/MyoChallengeAnalysis/src/functions.py)

## 1. Analysis of principal actions

### a. Performance vs. number of dimensions removed in the action space

1. Generate and save the data

In [None]:
num_ep = 20
n_comp = 39

PATH_TO_NORMALIZED_ENV = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
PATH_TO_PRETRAINED_NET = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

env_name = "CustomMyoBaodingBallsP2"
render = True

config = set_config(period=5,rot_dir="cw")
rollouts = []

envs = make_parallel_envs(env_name, config, num_env=1)
envs = VecNormalize.load(PATH_TO_NORMALIZED_ENV, envs)
envs.training = False
envs.norm_reward = False
custom_objects = {
    "learning_rate": lambda _: 0,
    "lr_schedule": lambda _: 0,
    "clip_range": lambda _: 0,
}
model = RecurrentPPO.load(
        PATH_TO_PRETRAINED_NET, env=envs, device="cpu", custom_objects=custom_objects
    )

eval_model = model
eval_env = EnvironmentFactory.create(env_name,**config)

for n in range(num_ep):
    acts = []
    cum_reward = 0
    lstm_states = None
    obs = eval_env.reset()
    print(eval_env.which_task)
    episode_starts = np.ones((1,), dtype=bool)
    done = False
    timestep = 0
    while not done: 
        if render :
            eval_env.sim.render(mode="window")
            
        timestep += 1
        action, lstm_states = eval_model.predict(envs.normalize_obs(obs),
                                                state=lstm_states,
                                                episode_start=episode_starts,
                                                deterministic=True,
                                                )
                                                    
        obs, rewards, done, info = eval_env.step(action)
        episode_starts = done
        cum_reward += rewards
        acts.append(action)
    print('episode %s : '%n,cum_reward)
    rollouts.append({'reward':cum_reward,'action':np.array(acts)})

your_path = ""
fp_rollouts = open(your_path, 'wb')
pickle.dump(rollouts,fp_rollouts)
fp_rollouts.close()

2. Load the data

In [None]:
# ABI TO INDICATE
your_path = ""
rollouts = pickle.load(open(your_path,'rb'))

3. a. Compute the principal actions\
b. Compute the performance when the actions are projected on a progressively lower-dimensional action subspace\
c. Save the data

In [None]:
actions = np.concatenate([rollout['action'] for rollout in rollouts])
performance = []
pca = PCA(n_components=n_comp).fit(actions)

for k in range(n_comp):
    print(k)
    components = pca.components_[:n_comp-k]
    performance_ep = []
    for n in range(num_ep):
        acts = []
        cum_reward = 0
        lstm_states = None
        obs = eval_env.reset()
        episode_starts = np.ones((1,), dtype=bool)
        done = False
        timestep = 0
        while not done: 
            if render :
                eval_env.sim.render(mode="window")
                
            timestep += 1
            action, lstm_states = eval_model.predict(envs.normalize_obs(obs),
                                                    state=lstm_states,
                                                    episode_start=episode_starts,
                                                    deterministic=True,
                                                    )
            
            action_proj = np.dot(action.reshape(-1,39)-pca.mean_,components.T)
            action_backproj = np.dot(action_proj,components)+pca.mean_
            obs, rewards, done, info = eval_env.step(action_backproj.reshape(39,))
            episode_starts = done
            cum_reward += rewards
        performance_ep.append(cum_reward)
    performance.append({'components':components,'reward':np.mean(np.array(performance_ep))})

path = ""
fp_acts_pcs = open(path, 'wb')
pickle.dump(performance,fp_acts_pcs)
fp_acts_pcs.close()

4. Load the data

In [None]:
# ABI TO INDICATE
your_path = ""
performance_components = pickle.load(open(your_path,'rb'))

5. Plot the performance vs. number of dimensions removed in the action space

In [None]:
perfs = [d['reward'] for d in performance_components]
comps = [d['components'] for d in performance_components]
plt.plot([k for k in range(n_comp)],perfs,linewidth=1)
plt.xlabel('Number of dimensions \nremoved in the action space',fontsize=21,labelpad=10)
plt.ylabel('Cumulative reward',fontsize=21,labelpad=10)
plt.yticks(fontsize=21)
plt.xticks(fontsize=21)
plt.subplots_adjust(left=0.2,bottom=0.23)
plt.show()

### b. Heatmap of principal actions

1. Load the principal actions (data generated in point a.3.) 

In [None]:
# ABI TO INDICATE
fp = ""
performance_components = pickle.load(open(fp,'rb'))
principal_actions = [d['components'] for d in performance_components][0]

2. Plot the heatmap

In [None]:
fig = sns.heatmap(pd.DataFrame(principal_actions[:13]),cmap="coolwarm").get_figure()
plt.xlabel('Action dimensions',fontsize=21)
plt.ylabel('Principal actions',fontsize=21)
plt.yticks(rotation=0,fontsize=17)
plt.xticks(ticks=np.arange(1,40,3),labels=np.arange(1,40,3),rotation=45,fontsize=17)
plt.subplots_adjust(left=0.15,bottom=0.2)
plt.plot

### c. Principal action vs. phase of rotation (time step)

1. Generate and save the data

In [None]:
num_ep = 100
n_comp = 39

PATH_TO_NORMALIZED_ENV = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/env.pkl",
)
PATH_TO_PRETRAINED_NET = os.path.join(
    ROOT_DIR,
    "trained_models/curriculum_steps_complete_baoding_winner/32_phase_2_smaller_rate_resume/model.zip",
)

env_name = "CustomMyoBaodingBallsP2"
render = False

config = set_config(period=5,rot_dir="cw")
rollouts = []

envs = make_parallel_envs(env_name, config, num_env=1)
envs = VecNormalize.load(PATH_TO_NORMALIZED_ENV, envs)
envs.training = False
envs.norm_reward = False
custom_objects = {
    "learning_rate": lambda _: 0,
    "lr_schedule": lambda _: 0,
    "clip_range": lambda _: 0,
}
model = RecurrentPPO.load(
        PATH_TO_PRETRAINED_NET, env=envs, device="cpu", custom_objects=custom_objects
    )

eval_model = model
eval_env = EnvironmentFactory.create(env_name,**config)
actions = []
for n in range(num_ep):
    print(n)
    acts_1ep = []
    cum_reward = 0
    lstm_states = None
    obs = eval_env.reset()
    episode_starts = np.ones((1,), dtype=bool)
    done = False
    timestep = 0
    while not done: 
        if render :
            eval_env.sim.render(mode="window")
            
        timestep += 1
        action, lstm_states = eval_model.predict(envs.normalize_obs(obs),
                                                state=lstm_states,
                                                episode_start=episode_starts,
                                                deterministic=True,
                                                )
                                                    
        obs, rewards, done, info = eval_env.step(action)
        episode_starts = done
        cum_reward += rewards   
        acts_1ep.append(action)
    if len(acts_1ep) < 200 :
        temp = np.zeros((200,39))
        temp[:len(acts_1ep)] += acts_1ep
        acts_1ep = temp
    actions.append(np.array(acts_1ep))

your_path = ""
fp_rollouts = open(your_path, 'wb')
pickle.dump(actions,fp_rollouts)
fp_rollouts.close()

1. Load the data

In [None]:
# ABI INSERT PATH
fp = '/home/ingster/Bureau/SIL-BigResults/rollout_100ep'
actions = pickle.load(open(fp,'rb'))

2. a. Compute the average principal actions \
b. Plot the PAs weights vs. time for the rotation phase

In [None]:
pca = PCA(n_components=n_comp)
mean_actions = sum(actions)/len(actions)
mean_weights = pca.fit_transform(mean_actions)

minmax = MinMaxScaler(feature_range=(-1,1))
weights=[]
t_min = 13; t_max = 200 # Rotation phase (transient phase from 0 to 13 time steps)
for j in range(15):
    norm_weights = minmax.fit_transform(mean_weights[t_min:,j].reshape(t_max-t_min,1))
    weights.append(norm_weights)

fig = sns.heatmap(pd.DataFrame(np.squeeze(weights)),cmap="coolwarm").get_figure()
plt.yticks(ticks=np.arange(1,16,1),labels=np.arange(1,16,1),rotation=0,fontsize=17)
plt.xticks(rotation=45,ticks=np.arange(0,t_max-t_min,21),labels=np.arange(t_min,t_max,21),fontsize=16)
plt.xlabel('Time step',fontsize=21)
plt.ylabel('Principal actions',fontsize=21)
plt.subplots_adjust(left=0.15,bottom=0.2)
plt.show()

## 2. Analysis of principal actions