In [None]:
# Must run this, the one installed is missing mpi
!pip install stable-baselines[mpi]==2.10.

In [None]:
import numpy as np

from stable_baselines import PPO2
from stable_baselines.common.cmd_util import make_vec_env
from stable_baselines.common.policies import MlpPolicy

from stable_baselines.common.vec_env import VecNormalize

import sys; sys.path.insert(0,'..')
from differential_drive_env_v2_wrappers import DifferentialDriveEnvV2Unscaled, RLAgentUnscalingWrapper
import baseline_integration as bi

from stable_baselines.gail import generate_expert_traj
from stable_baselines.gail import ExpertDataset
from stable_baselines.common.evaluation import evaluate_policy 

import yaml

from datetime import datetime

In [None]:
def find_closest_model(models_dict,value):
  values = np.array(list(models_dict.values()))
  distance = np.abs(value - values)
  min_distance = np.min(distance)
  min_distance_index = np.where(distance == min_distance)[0]  
  closest_model_key = list(models_dict.keys())[int(min_distance_index)]
  closest_model_value = np.float(values[min_distance_index])
  return closest_model_key, closest_model_value

In [None]:
yaml_path = './2021-06-25_it1_online_multi_track_probs.yaml'

with open(yaml_path) as file:
    parsed_yaml_file = yaml.load(file, Loader=yaml.FullLoader)

new_params_set = parsed_yaml_file['cma_hist'][-1]

In [None]:
models_dict = {
    'ppo2_gaussian_012':0.12,
    'ppo2_gaussian_016':0.16,
    'ppo2_gaussian_017':0.17
}

In [None]:
# Start from the first new value in the updated set
new_r_1 = new_params_set[0]
# Find the model corresponding to the closest radius' value
model_name_expert_1, r_expert_1 = find_closest_model(models_dict,new_r_1)

In [None]:
# Define the environment with r_expert_1
init_pose = [0.3, 0.3, np.pi]
env = DifferentialDriveEnvV2Unscaled(init_position=init_pose,goal_threshold = 0.1, L=0.5, r=r_expert_1, max_duration = 500)
env = make_vec_env(lambda: env, n_envs=1)
env = VecNormalize(env)

# Load the model
model = PPO2.load(model_name_expert_1)
model.set_env(env)
# Create the Expert Dataset 
dataset_filename_1 = model_name_expert_1 + '_expert_dataset'
# generate_expert_traj(model,file_name,n_episodes)
generate_expert_traj(model, dataset_filename_1, n_episodes=50)

del model 
del env # (you can delete them now, not useful anymore)

In [None]:
# Create the environment with the first updated radius value
env = DifferentialDriveEnvV2Unscaled(init_position=init_pose,goal_threshold = 0.1, L=0.5, r=new_r_1, max_duration = 500)
env = make_vec_env(lambda: env, n_envs=4)
env = VecNormalize(env)

In [None]:
# Load the Expert Dataset
dataset = ExpertDataset(expert_path=dataset_filename_1 +'.npz',
                        batch_size=32)

# define the model to pretrain on the generated dataset
model = PPO2(MlpPolicy, env, verbose=1,learning_rate=5e-6)

# Pretrain the PPO2 model
model.pretrain(dataset, n_epochs=500)

In [None]:
# Further train the model
model.learn(total_timesteps=500000)

In [None]:
# Give the new model a name and save it
# pretrained_model_name = "pretraining_attempt"
pretrained_model_name_1 = "ppo2_gaussian_0" + str(new_r_1 % 1).replace('0.','')
model.save(pretrained_model_name_1)

In [None]:
# Visualize the trajectory obtained. 

ppo2_model_name = pretrained_model_name_1 # Change this
env_class_name = DifferentialDriveEnvV2Unscaled
rl_agent_wrapper_class = RLAgentUnscalingWrapper
rl_agent_wrapper_params = {"state_scaling_factors": [1.0, 1.0, np.pi], "action_scaling_factors": [3.0, 3.0]}
init_robot_pose = {'x': 0.3, 'y': 0.3, 'theta': np.pi}
#init_robot_pose = {'x': 0.3, 'y': 0.3, 'theta': 0}
obss, actions = bi.load_and_run_model(ppo2_model_name, 500, 0.50, new_r_1, env_class_name, list(init_robot_pose.values()), rl_agent_wrapper_class, rl_agent_wrapper_params)
bi.show_rl_trajectory(obss,actions,0.50,new_r_1)

In [None]:
# Second new value in the updated set
new_r_2 = new_params_set[2]
# Find the model corresponding to the closest radius' value
model_name_expert_2, r_expert_2 = find_closest_model(models_dict,new_r_2)

In [None]:
# Define the environment with r_expert_2
#init_pose = [0.3, 0.3, np.pi]
env = DifferentialDriveEnvV2Unscaled(init_position=init_pose,goal_threshold = 0.1, L=0.5, r=r_expert_2, max_duration = 500)
env = make_vec_env(lambda: env, n_envs=1)
env = VecNormalize(env)

# Load the model
model = PPO2.load(model_name_expert_1)
model.set_env(env)
# generate_expert_traj(model,file_name,n_episodes)
dataset_filename_2 = model_name_expert_2 + '_expert_dataset'
generate_expert_traj(model, dataset_filename_2, n_episodes=50)

del model 
del env # (you can delete them now, not useful anymore)

In [None]:
# Create the environment with the second updated radius value
env = DifferentialDriveEnvV2Unscaled(init_position=init_pose,goal_threshold = 0.1, L=0.5, r=new_r_2, max_duration = 500)
env = make_vec_env(lambda: env, n_envs=4)
env = VecNormalize(env)

In [None]:
# Load the Expert Dataset
dataset = ExpertDataset(expert_path=dataset_filename_2 +'.npz',
                        batch_size=32)

# define the model to pretrain on the generated dataset
model = PPO2(MlpPolicy, env, verbose=1,learning_rate=5e-6)

# Pretrain the PPO2 model
model.pretrain(dataset, n_epochs=500)

In [None]:
# Further train the model
model.learn(total_timesteps=500000)

In [None]:
# Give the new model a name and save it
#pretrained_model_name = "pretraining_attempt"
pretrained_model_name_2 = "ppo2_gaussian_0" + str(new_r_2 % 1).replace('0.','')
model.save(pretrained_model_name_2)

In [None]:
# Visualize the trajectory obtained. 

ppo2_model_name = pretrained_model_name_1 # Change this
env_class_name = DifferentialDriveEnvV2Unscaled
rl_agent_wrapper_class = RLAgentUnscalingWrapper
rl_agent_wrapper_params = {"state_scaling_factors": [1.0, 1.0, np.pi], "action_scaling_factors": [3.0, 3.0]}
init_robot_pose = {'x': 0.3, 'y': 0.3, 'theta': np.pi}
#init_robot_pose = {'x': 0.3, 'y': 0.3, 'theta': 0}
obss, actions = bi.load_and_run_model(ppo2_model_name, 500, 0.50, new_r_2, env_class_name, list(init_robot_pose.values()), rl_agent_wrapper_class, rl_agent_wrapper_params)
bi.show_rl_trajectory(obss,actions,0.50,new_r_2)

In [None]:
# Third new value in the updated set
new_r_3 = new_params_set[3]
model_name_expert_3, r_expert_3 = find_closest_model(models_dict,new_r_3)

In [None]:
# Define the environment with r_expert_3
#init_pose = [0.3, 0.3, np.pi]
env = DifferentialDriveEnvV2Unscaled(init_position=init_pose,goal_threshold = 0.1, L=0.5, r=r_expert_3, max_duration = 500)
env = make_vec_env(lambda: env, n_envs=1)
env = VecNormalize(env)

# Load the model
model = PPO2.load(model_name_expert_3)
model.set_env(env)
# generate_expert_traj(model,file_name,n_episodes)
dataset_filename_3 = model_name_expert_3 + '_expert_dataset'
generate_expert_traj(model, dataset_filename_3, n_episodes=50)

del model 
del env # (you can delete them now, not useful anymore)

In [None]:
# Create the environment with the second updated radius value
env = DifferentialDriveEnvV2Unscaled(init_position=init_pose,goal_threshold = 0.1, L=0.5, r=new_r_3, max_duration = 500)
env = make_vec_env(lambda: env, n_envs=4)
env = VecNormalize(env)

In [None]:
# Load the Expert Dataset
dataset = ExpertDataset(expert_path=dataset_filename_3 +'.npz',
                        batch_size=32)

# define the model to pretrain on the generated dataset
model = PPO2(MlpPolicy, env, verbose=1,learning_rate=5e-6)

# Pretrain the PPO2 model
model.pretrain(dataset, n_epochs=500)

In [None]:
# Further train the model
model.learn(total_timesteps=500000)

In [None]:
# Give the new model a name and save it
#pretrained_model_name = "pretraining_attempt"
pretrained_model_name_3 = "ppo2_gaussian_0" + str(new_r_3 % 1).replace('0.','')
model.save(pretrained_model_name_3)

In [None]:
# Visualize the trajectory obtained. 

ppo2_model_name = pretrained_model_name_3 # Change this
env_class_name = DifferentialDriveEnvV2Unscaled
rl_agent_wrapper_class = RLAgentUnscalingWrapper
rl_agent_wrapper_params = {"state_scaling_factors": [1.0, 1.0, np.pi], "action_scaling_factors": [3.0, 3.0]}
init_robot_pose = {'x': 0.3, 'y': 0.3, 'theta': np.pi}
#init_robot_pose = {'x': 0.3, 'y': 0.3, 'theta': 0}
obss, actions = bi.load_and_run_model(ppo2_model_name, 500, 0.50, new_r_3, env_class_name, list(init_robot_pose.values()), rl_agent_wrapper_class, rl_agent_wrapper_params)
bi.show_rl_trajectory(obss,actions,0.50,new_r_3)

In [None]:
models_info = { pretrained_model_name_1 : new_r_1,
    pretrained_model_name_2 : new_r_2,
    pretrained_model_name_3 : new_r_3,    
}

In [None]:
# Save in a yaml file the names of the new models
# To each model as a key correspond the respective radius value

date_prefix = datetime.today().strftime('%Y-%m-%d')
name_prefix = 'updated_parameters_ppo_models'
output_file_prefix = date_prefix +'_'+name_prefix
log_file = output_file_prefix+'.yaml'

with open(log_file, 'w') as file:
    documents = yaml.dump(models_info, file)
    