In [1]:
#@title Check if installation was successful

#from google.colab import files

import distutils.util
import os
import subprocess
if subprocess.run('nvidia-smi').returncode:
  raise RuntimeError(
      'Cannot communicate with GPU. '
      'Make sure you are using a GPU Colab runtime. '
      'Go to the Runtime menu and select Choose runtime type.')

# Add an ICD config so that glvnd can pick up the Nvidia EGL driver.
# This is usually installed as part of an Nvidia driver package, but the Colab
# kernel doesn't install its driver via APT, and as a result the ICD is missing.
# (https://github.com/NVIDIA/libglvnd/blob/master/src/EGL/icd_enumeration.md)
NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

# Configure MuJoCo to use the EGL rendering backend (requires GPU)
print('Setting environment variable to use GPU rendering:')
%env MUJOCO_GL=egl

try:
  print('Checking that the installation succeeded:')
  import mujoco
  mujoco.MjModel.from_xml_string('<mujoco/>')
except Exception as e:
  raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')

print('Installation successful.')

#@title Import packages for plotting and creating graphics
import time
import itertools
import numpy as np
from typing import Callable, NamedTuple, Optional, Union, List

# Graphics and plotting.
print('Installing mediapy:')
!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)
!pip install -q mediapy
import mediapy as media
import matplotlib.pyplot as plt

# More legible printing from numpy.
np.set_printoptions(precision=3, suppress=True, linewidth=100)

Tue Dec 12 08:38:22 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.147.05   Driver Version: 525.147.05   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
|  0%   50C    P0    54W / 175W |     94MiB /  8192MiB |      2%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

  import distutils.util


Installation successful.
Installing mediapy:


In [2]:
%env DISPLAY=897985abde1b:10.0

env: DISPLAY=897985abde1b:10.0


In [3]:
''''
import random
import gymnasium as gym
env = gym.make('Hopper', render_mode='rgb_array')
env.reset()
n_frames = 120
height = 480
width = 480
frames = []
#import mujoco
model = env.model
data = env.data

mujoco.mj_step(model, data)
renderer = mujoco.Renderer(model, height, width)
mujoco.mj_resetDataKeyframe(model, data,0)

joint_names = ['thigh_joint', 'leg_joint', 'foot_joint']
joint_ranges = [model.joint(joint).range for joint in joint_names]
for i in range(n_frames):
  while data.time < i/30.0:   
    new_action = []
        
    # ob, reward, terminated, False, {}
    obs, reward, terminated, _, _ = env.step(new_action)
    #mujoco.mj_step(model, data)
  renderer.update_scene(data)
  frame = renderer.render()
  frames.append(frame)
media.show_video(frames, fps=30)

'''



"'\nimport random\nimport gymnasium as gym\nenv = gym.make('Hopper', render_mode='rgb_array')\nenv.reset()\nn_frames = 120\nheight = 480\nwidth = 480\nframes = []\n#import mujoco\nmodel = env.model\ndata = env.data\n\nmujoco.mj_step(model, data)\nrenderer = mujoco.Renderer(model, height, width)\nmujoco.mj_resetDataKeyframe(model, data,0)\n\njoint_names = ['thigh_joint', 'leg_joint', 'foot_joint']\njoint_ranges = [model.joint(joint).range for joint in joint_names]\nfor i in range(n_frames):\n  while data.time < i/30.0:   \n    new_action = []\n        \n    # ob, reward, terminated, False, {}\n    obs, reward, terminated, _, _ = env.step(new_action)\n    #mujoco.mj_step(model, data)\n  renderer.update_scene(data)\n  frame = renderer.render()\n  frames.append(frame)\nmedia.show_video(frames, fps=30)\n\n"

In [7]:

import argparse
import importlib
import os
import sys

import numpy as np
import torch as th
import yaml
from huggingface_sb3 import EnvironmentName
from stable_baselines3.common.callbacks import tqdm
from stable_baselines3.common.utils import set_random_seed

import rl_zoo3.import_envs  # noqa: F401 pylint: disable=unused-import
from rl_zoo3 import ALGOS, create_test_env, get_saved_hyperparams
from rl_zoo3.exp_manager import ExperimentManager
from rl_zoo3.load_from_hub import download_from_hub
from rl_zoo3.utils import StoreDict, get_model_path
import cv2
from collections import OrderedDict
import numpy as np
import torch as th
import matplotlib.pyplot as plt

from jesnk.utils import enjoy


parser = argparse.ArgumentParser()
parser.add_argument("--env", help="environment ID", type=EnvironmentName, default="CartPole-v1")
parser.add_argument("-f", "--folder", help="Log folder", type=str, default="rl-trained-agents")
parser.add_argument("--algo", help="RL Algorithm", default="ppo", type=str, required=False, choices=list(ALGOS.keys()))
parser.add_argument("-n", "--n-timesteps", help="number of timesteps", default=1000, type=int)
parser.add_argument("--num-threads", help="Number of threads for PyTorch (-1 to use default)", default=-1, type=int)
parser.add_argument("--n-envs", help="number of environments", default=1, type=int)
parser.add_argument("--exp-id", help="Experiment ID (default: 0: latest, -1: no exp folder)", default=0, type=int)
parser.add_argument("--verbose", help="Verbose mode (0: no output, 1: INFO)", default=1, type=int)
parser.add_argument(
    "--no-render", action="store_true", default=False, help="Do not render the environment (useful for tests)"
)
parser.add_argument("--deterministic", action="store_true", default=False, help="Use deterministic actions")
parser.add_argument("--device", help="PyTorch device to be use (ex: cpu, cuda...)", default="auto", type=str)
parser.add_argument(
    "--load-best", action="store_true", default=False, help="Load best model instead of last model if available"
)
parser.add_argument(
    "--load-checkpoint",
    type=int,
    help="Load checkpoint instead of last model if available, "
    "you must pass the number of timesteps corresponding to it",
)
parser.add_argument(
    "--load-last-checkpoint",
    action="store_true",
    default=False,
    help="Load last checkpoint instead of last model if available",
)
parser.add_argument("--stochastic", action="store_true", default=False, help="Use stochastic actions")
parser.add_argument(
    "--norm-reward", action="store_true", default=False, help="Normalize reward if applicable (trained with VecNormalize)"
)
parser.add_argument("--seed", help="Random generator seed", type=int, default=0)
parser.add_argument("--reward-log", help="Where to log reward", default="", type=str)
parser.add_argument(
    "--gym-packages",
    type=str,
    nargs="+",
    default=[],
    help="Additional external Gym environment package modules to import",
)
parser.add_argument(
    "--env-kwargs", type=str, nargs="+", action=StoreDict, help="Optional keyword argument to pass to the env constructor"
)
parser.add_argument(
    "--custom-objects", action="store_true", default=False, help="Use custom objects to solve loading issues"
)
parser.add_argument(
    "-P",
    "--progress",
    action="store_true",
    default=False,
    help="if toggled, display a progress bar using tqdm and rich",
)
parser.add_argument(
    "--render_rgb", action='store_true', default=False, help="if toggled, render rgb array"
)
#args = parser.parse_args()

args = parser.parse_args(args=[])
args.env = EnvironmentName('FetchPickAndPlace-v2')
args.algo = 'tqc'
args.load_last_checkpoint = False
args.no_render = True
args.render_rgb = True
args.folder = './logs/'
args.exp_id = 8
args.env_kwargs = {'render_mode': 'rgb_array'}#,'continuing_task':False}
args.n_timesteps = 1000
args.device = 'cuda'
enjoy(args)


Loading ./logs/tqc/FetchPickAndPlace-v2_8/FetchPickAndPlace-v2.zip
HER REPLAY BUFFER
Episode Reward: -12.00
Episode Length 50
initial_agent [[1.204 0.604 0.425]], goal:[[1.436 0.873 0.425]]
Episode Reward: -13.00
Episode Length 50
initial_agent [[1.355 0.88  0.425]], goal:[[1.437 0.6   0.753]]
Episode Reward: -7.00
Episode Length 50
initial_agent [[1.245 0.858 0.425]], goal:[[1.354 0.689 0.481]]
Episode Reward: -7.00
Episode Length 50
initial_agent [[1.491 0.893 0.425]], goal:[[1.398 0.794 0.485]]
Episode Reward: -8.00
Episode Length 50
initial_agent [[1.459 0.879 0.425]], goal:[[1.299 0.771 0.425]]
Episode Reward: -5.00
Episode Length 50
initial_agent [[1.459 0.667 0.425]], goal:[[1.379 0.624 0.425]]
Episode Reward: -14.00
Episode Length 50
initial_agent [[1.264 0.862 0.425]], goal:[[1.21  0.7   0.783]]
Episode Reward: -4.00
Episode Length 50
initial_agent [[1.261 0.615 0.425]], goal:[[1.313 0.659 0.425]]
Episode Reward: -10.00
Episode Length 50
initial_agent [[1.252 0.882 0.425]], go

0
This browser does not support the video tag.


Success rate: 100.00%
20 Episodes
Mean reward: -8.35 +/- 3.04
Mean episode length: 50.00 +/- 0.00


In [5]:
from jesnk.utils import rollout_trajectory

args = parser.parse_args(args=[])
args.env = EnvironmentName('PointMaze_Large-v3')
args.algo = 'tqc'
args.load_last_checkpoint = False
args.no_render = True
args.render_rgb = True
args.folder = './logs/'
args.exp_id = 16
args.env_kwargs = {'render_mode': 'rgb_array','continuing_task':False}
args.n_timesteps = 4000
args.device = 'cuda'
data = rollout_trajectory(args)


Loading ./logs/tqc/PointMaze_Large-v3_16/PointMaze_Large-v3.zip
HER REPLAY BUFFER


AttributeError: 'MultiInputPolicy' object has no attribute 'replay_buffer'

In [None]:
# load pkl file
import pickle
with open('./logs/tqc/PointMaze_Large-v3_2/replay_buffer.pkl', 'rb') as f:
    data = pickle.load(f)

In [None]:
print(data.ep_start[800])
print(data.observations['observation'][0])

[800]
[[-1.68   1.118  0.     0.   ]]


In [None]:
print(len(data[0]['obs']))
print(len(data[0]['action']))

TypeError: 'NoneType' object is not subscriptable

In [None]:
data[0]['action']

[array([[0.506, 0.982]], dtype=float32),
 array([[0.432, 0.978]], dtype=float32),
 array([[0.339, 0.968]], dtype=float32),
 array([[0.238, 0.947]], dtype=float32),
 array([[0.114, 0.944]], dtype=float32),
 array([[-0.002,  0.941]], dtype=float32),
 array([[-0.034,  0.938]], dtype=float32),
 array([[-0.065,  0.937]], dtype=float32),
 array([[-0.102,  0.937]], dtype=float32),
 array([[-0.135,  0.939]], dtype=float32),
 array([[-0.195,  0.943]], dtype=float32),
 array([[-0.267,  0.946]], dtype=float32),
 array([[-0.5  ,  0.931]], dtype=float32),
 array([[-0.69 ,  0.906]], dtype=float32),
 array([[-0.854,  0.851]], dtype=float32),
 array([[-0.91 ,  0.788]], dtype=float32),
 array([[-0.918,  0.706]], dtype=float32),
 array([[-0.925,  0.608]], dtype=float32),
 array([[-0.93 ,  0.515]], dtype=float32),
 array([[-0.934,  0.419]], dtype=float32),
 array([[-0.936,  0.139]], dtype=float32),
 array([[-0.937, -0.269]], dtype=float32),
 array([[-0.938, -0.548]], dtype=float32),
 array([[-0.938, -0.7

In [None]:
data[1]['reward']

[array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], dtype=float32),
 array([0.], d

In [None]:
# load replay buffer
from rl_zoo3.utils import ReplayBuffer

args = parser.parse_args(args=[])
args.env = EnvironmentName('PointMaze_Large-v3')
args.algo = 'tqc'
args.load_last_checkpoint = False
args.no_render = True
args.render_rgb = True
args.folder = './logs/'
args.exp_id = 16
args.env_kwargs = {'render_mode': 'rgb_array','continuing_task':False}


ImportError: cannot import name 'ReplayBuffer' from 'rl_zoo3.utils' (/research/rl_zoo_jesnk/rl_zoo3/utils.py)