## 0. Import Dependencies and ros-node

In [None]:
# %config IPCompleter.use_jedi=False
# https://github.com/PierreExeter/custom_gym_envs

In [None]:
import gym
import gym_envs
import random
import numpy as np 
import rospy

In [None]:
# First of all, launch Jaco in Gazebo in another terminal with:
# roslaunch kinova_gazebo robot_launch_noRender_noSphere.launch kinova_robotType:=j2n6s300
# OR
# roslaunch kinova_gazebo robot_launch_render.launch kinova_robotType:=j2n6s300

rospy.init_node("kinova_client", anonymous=True, log_level=rospy.INFO)

## 1. Test Random Environment

In [None]:
env = gym.make('JacoReachGazebo-v1')

In [None]:
print('Environment:', env)
print('Action space:', env.action_space)
print(env.action_space.high)
print(env.action_space.low)
print('State space:', env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

In [None]:
for episode in range(3):

    obs = env.reset()
    rewards = []

    for t in range(5):

        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)

        print("timestep:", t)
        print("action: ", action)
        print("observation: ", obs)
        print("reward: ", reward)
        print("done: ", done)
        print("info: ", info)
        
        if done:
            rewards.append(reward)
            break

    cumulative_reward = sum(rewards)
    print("episode {} | cumulative reward : {}".format(episode, cumulative_reward))  

env.close()

## 2. Build and Train the SB3-Model

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
env_name = 'JacoReachGazebo-v1'
env = gym.make(env_name)
env = DummyVecEnv([lambda: env])

In [None]:
import os

log_path = os.path.join('Training', 'Logs')
# model = PPO('MlpPolicy', env, verbose = 1)
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)

In [None]:
# model.learn(total_timesteps=1000)
# model.learn(total_timesteps=10000)  #Segmentation fault (core dumped)
model.learn(total_timesteps=20000)  #Segmentation fault (core dumped)

## 3. Save and Reload Model

In [None]:
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_model')

In [None]:
model.save(PPO_path)

In [None]:
del model

In [None]:
model = PPO.load(PPO_path, env=env)

## 5. Showing Trained Model

In [None]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()
    if done: 
        print('info', info)
        break

In [None]:
env.close()

## 6. Viewing Logs in Tensorboard

In [None]:
training_log_path = os.path.join(log_path, 'PPO_1')

In [None]:
!tensorboard --logdir={training_log_path}