In [None]:
!pip install mujoco
!pip install gymnasium

# Set up GPU rendering.
from google.colab import files
import distutils.util
import os
import subprocess
if subprocess.run('nvidia-smi').returncode:
  raise RuntimeError(
      'Cannot communicate with GPU. '
      'Make sure you are using a GPU Colab runtime. '
      'Go to the Runtime menu and select Choose runtime type.')

# Add an ICD config so that glvnd can pick up the Nvidia EGL driver.
# This is usually installed as part of an Nvidia driver package, but the Colab
# kernel doesn't install its driver via APT, and as a result the ICD is missing.
# (https://github.com/NVIDIA/libglvnd/blob/master/src/EGL/icd_enumeration.md)
NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

# Configure MuJoCo to use the EGL rendering backend (requires GPU)
print('Setting environment variable to use GPU rendering:')
%env MUJOCO_GL=egl

# Check if installation was succesful.
try:
  print('Checking that the installation succeeded:')
  import mujoco
  mujoco.MjModel.from_xml_string('<mujoco/>')
except Exception as e:
  raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')

print('Installation successful.')

# Other imports and helper functions
import time
import itertools
import numpy as np

# Graphics and plotting.
print('Installing mediapy:')
!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)
!pip install -q mediapy
import mediapy as media
import matplotlib.pyplot as plt

# More legible printing from numpy.
np.set_printoptions(precision=3, suppress=True, linewidth=100)

from IPython.display import clear_output
clear_output()


In [None]:
PROJECT_PATH = '/content/drive/MyDrive/CS 8803 DRL/dp'

In [None]:
%cd $PROJECT_PATH
%ls

In [None]:
!pip install pyquaternion
# !pip install stable_baselines3[extra]



In [None]:
import sys
sys.path.append(PROJECT_PATH)

In [None]:
import os
print(os.listdir())

['transformations.py', 'mocap', 'TrajData.py', '__pycache__', '.ipynb_checkpoints', 'mocap_data', 'CustomEnv_old.py', 'videos', 'humanoid_dm_old.xml', 'humanoid_dm_old_new.xml', 'videos_new', 'end_effector.json', 'env_register.py', 'env_creator.py', 'no_gear_humanoid_dm.xml', '__init__.py', 'pd_control.py', 'imitationn learning new.ipynb', 'MUJOCO_LOG.TXT', 'lasthumanoid_dm.xml', 'humanoid_dm.xml', 'videos_new2', 'DRL.py', 'PPOAgent.py', 'CustomEnv.py', 'DRL_DeepMimic.ipynb', 'test.xml']


In [None]:
import importlib
import mocap.mocap
# importlib.reload(mocap.mocap)

1
2
importing mocap.py


In [None]:
import PPOAgent
importlib.reload(PPOAgent)

<module 'PPOAgent' from '/content/drive/MyDrive/CS 8803 DRL/dp/PPOAgent.py'>

In [None]:
import pd_control
import CustomEnv
import DRL
from PPOAgent import PPOAgent
from TrajData import TrajData
from mocap.mocap import MocapDM

In [None]:
importlib.reload(CustomEnv)

<module 'CustomEnv' from '/content/drive/MyDrive/CS 8803 DRL/dp/CustomEnv.py'>

In [None]:
importlib.reload(DRL)
from mocap.mocap import MocapDM
# from DRL import DRL

In [None]:
# Test Training
drl = DRL.DRL()

(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)
(77, 44)


In [None]:
import tqdm
for i in tqdm.tqdm(range(10)):
  drl.rollout(i)
  drl.update()

  0%|          | 0/10 [00:00<?, ?it/s]

avg reward:  tensor(0.3541)
tensor(669.0806, grad_fn=<AddBackward0>)
tensor(662.3640, grad_fn=<AddBackward0>)
tensor(655.3445, grad_fn=<AddBackward0>)
tensor(648.6002, grad_fn=<AddBackward0>)
tensor(641.7789, grad_fn=<AddBackward0>)
tensor(634.9235, grad_fn=<AddBackward0>)
tensor(628.1495, grad_fn=<AddBackward0>)
tensor(621.3666, grad_fn=<AddBackward0>)
tensor(614.5225, grad_fn=<AddBackward0>)
tensor(607.6651, grad_fn=<AddBackward0>)


 10%|█         | 1/10 [00:40<06:03, 40.43s/it]

avg reward:  tensor(0.3559)
tensor(520.0762, grad_fn=<AddBackward0>)
tensor(513.6759, grad_fn=<AddBackward0>)
tensor(507.2775, grad_fn=<AddBackward0>)
tensor(500.8316, grad_fn=<AddBackward0>)
tensor(494.3057, grad_fn=<AddBackward0>)
tensor(487.7461, grad_fn=<AddBackward0>)
tensor(481.1586, grad_fn=<AddBackward0>)
tensor(474.5076, grad_fn=<AddBackward0>)
tensor(467.7771, grad_fn=<AddBackward0>)
tensor(460.9654, grad_fn=<AddBackward0>)


 20%|██        | 2/10 [01:21<05:26, 40.77s/it]

avg reward:  tensor(0.3693)
tensor(498.3614, grad_fn=<AddBackward0>)
tensor(491.0880, grad_fn=<AddBackward0>)
tensor(483.6962, grad_fn=<AddBackward0>)
tensor(476.2146, grad_fn=<AddBackward0>)
tensor(468.6056, grad_fn=<AddBackward0>)
tensor(460.8780, grad_fn=<AddBackward0>)
tensor(453.0607, grad_fn=<AddBackward0>)
tensor(445.1572, grad_fn=<AddBackward0>)
tensor(437.1585, grad_fn=<AddBackward0>)
tensor(429.0602, grad_fn=<AddBackward0>)


 30%|███       | 3/10 [02:01<04:44, 40.64s/it]

avg reward:  tensor(0.3562)
tensor(399.8111, grad_fn=<AddBackward0>)
tensor(392.1152, grad_fn=<AddBackward0>)
tensor(384.3667, grad_fn=<AddBackward0>)
tensor(376.5967, grad_fn=<AddBackward0>)
tensor(368.7813, grad_fn=<AddBackward0>)
tensor(360.9242, grad_fn=<AddBackward0>)
tensor(353.0573, grad_fn=<AddBackward0>)
tensor(345.1864, grad_fn=<AddBackward0>)
tensor(337.3055, grad_fn=<AddBackward0>)
tensor(329.4198, grad_fn=<AddBackward0>)


 40%|████      | 4/10 [02:42<04:04, 40.74s/it]

avg reward:  tensor(0.3583)
tensor(365.0688, grad_fn=<AddBackward0>)
tensor(356.3199, grad_fn=<AddBackward0>)
tensor(347.5132, grad_fn=<AddBackward0>)
tensor(338.6908, grad_fn=<AddBackward0>)
tensor(329.8430, grad_fn=<AddBackward0>)
tensor(320.9921, grad_fn=<AddBackward0>)
tensor(312.1829, grad_fn=<AddBackward0>)
tensor(303.4290, grad_fn=<AddBackward0>)
tensor(294.7325, grad_fn=<AddBackward0>)
tensor(286.1171, grad_fn=<AddBackward0>)


 50%|█████     | 5/10 [03:22<03:22, 40.41s/it]

avg reward:  tensor(0.3463)
tensor(231.1293, grad_fn=<AddBackward0>)
tensor(223.8868, grad_fn=<AddBackward0>)
tensor(216.8368, grad_fn=<AddBackward0>)
tensor(210.0072, grad_fn=<AddBackward0>)
tensor(203.3885, grad_fn=<AddBackward0>)
tensor(196.9919, grad_fn=<AddBackward0>)
tensor(190.8439, grad_fn=<AddBackward0>)
tensor(184.9475, grad_fn=<AddBackward0>)
tensor(179.3031, grad_fn=<AddBackward0>)
tensor(173.9198, grad_fn=<AddBackward0>)


 60%|██████    | 6/10 [04:03<02:42, 40.66s/it]

avg reward:  tensor(0.3619)
tensor(164.8991, grad_fn=<AddBackward0>)
tensor(159.5288, grad_fn=<AddBackward0>)
tensor(154.3880, grad_fn=<AddBackward0>)
tensor(149.5108, grad_fn=<AddBackward0>)
tensor(144.8947, grad_fn=<AddBackward0>)
tensor(140.5372, grad_fn=<AddBackward0>)
tensor(136.4620, grad_fn=<AddBackward0>)
tensor(132.6742, grad_fn=<AddBackward0>)
tensor(129.1644, grad_fn=<AddBackward0>)
tensor(125.9299, grad_fn=<AddBackward0>)


 70%|███████   | 7/10 [04:45<02:02, 40.90s/it]

avg reward:  tensor(0.3648)
tensor(131.6860, grad_fn=<AddBackward0>)
tensor(128.9586, grad_fn=<AddBackward0>)
tensor(126.4534, grad_fn=<AddBackward0>)
tensor(124.1916, grad_fn=<AddBackward0>)
tensor(122.1444, grad_fn=<AddBackward0>)
tensor(120.2965, grad_fn=<AddBackward0>)
tensor(118.6628, grad_fn=<AddBackward0>)
tensor(117.2194, grad_fn=<AddBackward0>)
tensor(115.9362, grad_fn=<AddBackward0>)
tensor(114.8102, grad_fn=<AddBackward0>)


 80%|████████  | 8/10 [05:25<01:21, 40.83s/it]

avg reward:  tensor(0.3728)
tensor(121.6075, grad_fn=<AddBackward0>)
tensor(120.8144, grad_fn=<AddBackward0>)
tensor(120.1183, grad_fn=<AddBackward0>)
tensor(119.5307, grad_fn=<AddBackward0>)
tensor(119.0061, grad_fn=<AddBackward0>)
tensor(118.5359, grad_fn=<AddBackward0>)
tensor(118.1251, grad_fn=<AddBackward0>)
tensor(117.7572, grad_fn=<AddBackward0>)
tensor(117.4057, grad_fn=<AddBackward0>)
tensor(117.0627, grad_fn=<AddBackward0>)


 90%|█████████ | 9/10 [06:06<00:40, 40.80s/it]

avg reward:  tensor(0.3609)
tensor(115.9081, grad_fn=<AddBackward0>)
tensor(115.4718, grad_fn=<AddBackward0>)
tensor(115.0255, grad_fn=<AddBackward0>)
tensor(114.5896, grad_fn=<AddBackward0>)
tensor(114.1438, grad_fn=<AddBackward0>)
tensor(113.6890, grad_fn=<AddBackward0>)
tensor(113.2416, grad_fn=<AddBackward0>)
tensor(112.7984, grad_fn=<AddBackward0>)
tensor(112.3480, grad_fn=<AddBackward0>)
tensor(111.8954, grad_fn=<AddBackward0>)


100%|██████████| 10/10 [06:47<00:00, 40.76s/it]


In [None]:
# test agent:
import cv2
from PIL import Image
video_dir = "./videos_new3"  # Directory to save videos
os.makedirs(video_dir, exist_ok=True)

In [None]:
def save_render_image(image,ind):
    outpath = os.path.join("./videos_new3",f"{ind}.png")
    im = Image.fromarray(image)
    im.save(outpath)

In [None]:
import torch

In [None]:

# Create environment with proper render_mode
env = CustomEnv.MyEnv("")

# Apply video recording wrapper
# env = RecordVideo(env, video_folder=video_dir, episode_trigger=lambda x: True)

obs, _ = env.reset()

with mujoco.Renderer(env.model) as renderer:

  for t in tqdm.tqdm(range(400)):

    with torch.no_grad():
      actions, _ = drl.agent.get_action(torch.Tensor(obs))  # Get action from policy
    next_obs, rewards, done, truncated, infos = env.step(actions)

    if done:
        # self.writer.add_scalar("Duration", t, i)
        break
    mujoco.mj_forward(env.model,env.data)
    renderer.update_scene(env.data)
    pixels = renderer.render()
    save_render_image(pixels,t)
    obs = torch.Tensor(next_obs)


env.close()

(77, 44)


100%|██████████| 400/400 [00:14<00:00, 26.86it/s]


In [None]:
def write_video(folder_path):
  output = os.path.join(folder_path,"0324-9-early_ter.mp4")
  files = os.listdir(folder_path)
  # fp = folder_path+f'/{files[5]}'
  # print(fp)
  # # num = len(files)//2
  num = 400
  # frame = cv2.imread(fp)
  # frame_height, frame_width, _ = frame.shape
  # print(frame_height,frame_width)
  frame_height,frame_width = 240,320

  fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Use "XVID" for AVI
  out = cv2.VideoWriter(output, fourcc, 100, (frame_width, frame_height))
  for idx in range(num):
      img_path = "./videos_new3"+f"/{idx}.png"
      frame = cv2.imread(img_path)
      out.write(frame)  # Write frame to video

  # Release writer
  out.release()

In [None]:
write_video("./videos_new3")